Implemented initial Federated Content Search Query Language (FCSQL)
serialization (term query).
Change-Id: I5da3916a785f854c2760c76a92d27bddcc3e0b03
diff --git a/.settings/.gitignore b/.settings/.gitignore
new file mode 100644
index 0000000..0450880
--- /dev/null
+++ b/.settings/.gitignore
@@ -0,0 +1,2 @@
+/org.eclipse.m2e.core.prefs
+/org.eclipse.wst.common.project.facet.core.xml
diff --git a/pom.xml b/pom.xml
index 8f8a82d..28bfabf 100644
--- a/pom.xml
+++ b/pom.xml
@@ -20,7 +20,7 @@
<dependency>
<groupId>org.antlr</groupId>
<artifactId>antlr4-runtime</artifactId>
- <version>4.2</version>
+ <version>4.5.1</version>
</dependency>
<dependency>
<groupId>org.antlr</groupId>
@@ -94,6 +94,26 @@
<artifactId>slf4j-log4j12</artifactId>
<version>1.7.5</version>
</dependency>
+ <dependency>
+ <groupId>eu.clarin.sru.fcs</groupId>
+ <artifactId>fcs-simple-endpoint</artifactId>
+ <version>1.3.0</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.lucene</groupId>
+ <artifactId>lucene-core</artifactId>
+ <version>5.2.1</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.lucene</groupId>
+ <artifactId>lucene-analyzers-common</artifactId>
+ <version>5.2.1</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.lucene</groupId>
+ <artifactId>lucene-queryparser</artifactId>
+ <version>5.2.1</version>
+ </dependency>
</dependencies>
<build>
<sourceDirectory>${basedir}/src/main/java</sourceDirectory>
diff --git a/src/main/java/de/ids_mannheim/korap/query/serialize/CqlQueryProcessor.java b/src/main/java/de/ids_mannheim/korap/query/serialize/CqlQueryProcessor.java
index b06731c..e2070bf 100644
--- a/src/main/java/de/ids_mannheim/korap/query/serialize/CqlQueryProcessor.java
+++ b/src/main/java/de/ids_mannheim/korap/query/serialize/CqlQueryProcessor.java
@@ -22,16 +22,12 @@
private static final String INDEX_WORDS = "words";
private static final String TERM_RELATION_CQL_1_1 = "scr";
private static final String TERM_RELATION_CQL_1_2 = "=";
- private static final String SUPPORTED_RELATION_EXACT = "exact"; // not
- // in
- // the
- // doc
+ private static final String SUPPORTED_RELATION_EXACT = "exact"; // not in the doc
private static final String OPERATION_OR = "operation:or";
private static final String OPERATION_SEQUENCE = "operation:sequence";
private static final String OPERATION_POSITION = "operation:position";
private static final String KORAP_CONTEXT = "http://ids-mannheim.de/ns/KorAP/json-ld/v0.1/context.jsonld";
- private LinkedHashMap<String, Object> requestMap;
private String version;
private boolean isCaseSensitive; // default true
@@ -40,12 +36,10 @@
this(query, VERSION_1_2, true);
}
-
public CqlQueryProcessor (String query, String version) {
this(query, version, true);
}
-
public CqlQueryProcessor (String query, String version,
boolean isCaseSensitive) {
this.version = version;
@@ -55,13 +49,11 @@
process(query);
}
-
@Override
public Map<String, Object> getRequestMap () {
return this.requestMap;
}
-
@Override
public void process (String query) {
if ((query == null) || query.isEmpty())
diff --git a/src/main/java/de/ids_mannheim/korap/query/serialize/FCSQLQueryProcessor.java b/src/main/java/de/ids_mannheim/korap/query/serialize/FCSQLQueryProcessor.java
new file mode 100644
index 0000000..7e0c199
--- /dev/null
+++ b/src/main/java/de/ids_mannheim/korap/query/serialize/FCSQLQueryProcessor.java
@@ -0,0 +1,269 @@
+package de.ids_mannheim.korap.query.serialize;
+
+import java.util.ArrayList;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import de.ids_mannheim.korap.query.serialize.util.StatusCodes;
+import eu.clarin.sru.server.SRUQueryBase;
+import eu.clarin.sru.server.SRUVersion;
+import eu.clarin.sru.server.fcs.Constants;
+import eu.clarin.sru.server.fcs.parser.Expression;
+import eu.clarin.sru.server.fcs.parser.Operator;
+import eu.clarin.sru.server.fcs.parser.QueryNode;
+import eu.clarin.sru.server.fcs.parser.QueryParser;
+import eu.clarin.sru.server.fcs.parser.QueryParserException;
+import eu.clarin.sru.server.fcs.parser.QuerySegment;
+import eu.clarin.sru.server.fcs.parser.RegexFlag;
+
+public class FCSQLQueryProcessor extends AbstractQueryProcessor {
+
+ public static final class FCSQuery extends SRUQueryBase<QueryNode> {
+
+ private FCSQuery(String rawQuery, QueryNode parsedQuery) {
+ super(rawQuery, parsedQuery);
+ }
+
+ @Override
+ public String getQueryType() {
+ return Constants.FCS_QUERY_TYPE_FCS;
+ }
+ }
+
+ public enum Foundry {
+ CNX, OPENNLP, TT, MATE, XIP;
+ }
+
+ private static final String KORAP_CONTEXT = "http://ids-mannheim.de/ns/KorAP/json-ld/v0.1/context.jsonld";
+ private String version;
+ private List<Foundry> supportedFoundries;
+ private final QueryParser parser = new QueryParser();
+
+ public FCSQLQueryProcessor(String query, String version) {
+ if (version == null) {
+ addError(StatusCodes.MISSING_VERSION,
+ "SRU Diagnostic 7: Version number is missing.");
+ } else if (!version.equals(SRUVersion.VERSION_2_0)) {
+ addError(StatusCodes.MISSING_VERSION,
+ "SRU Diagnostic 5: Only supports SRU version 2.0.");
+ }
+ this.version = version;
+
+ this.requestMap = new LinkedHashMap<>();
+ requestMap.put("@context", KORAP_CONTEXT);
+
+ this.supportedFoundries = new ArrayList<Foundry>(5);
+ supportedFoundries.add(Foundry.CNX);
+ supportedFoundries.add(Foundry.OPENNLP);
+ supportedFoundries.add(Foundry.TT);
+ supportedFoundries.add(Foundry.MATE);
+ supportedFoundries.add(Foundry.XIP);
+
+ process(query);
+ }
+
+ @Override
+ public Map<String, Object> getRequestMap() {
+ return this.requestMap;
+ }
+
+ @Override
+ public void process(String query) {
+ FCSQuery fcsQuery = parseQueryStringtoFCSQuery(query);
+ QueryNode fcsQueryNode = fcsQuery.getParsedQuery();
+ Map<String, Object> queryMap = parseFCSQuery(fcsQueryNode);
+ requestMap.put("query", queryMap);
+ }
+
+ private FCSQuery parseQueryStringtoFCSQuery(String query) {
+ if ((query == null) || query.isEmpty())
+ addError(StatusCodes.MALFORMED_QUERY,
+ "SRU diagnostic 1: No query has been passed.");
+ FCSQuery fcsQuery = null;
+ try {
+ QueryNode parsedQuery = parser.parse(query);
+ fcsQuery = new FCSQuery(query, parsedQuery);
+ } catch (QueryParserException e) {
+ addError(StatusCodes.UNKNOWN_QUERY_ERROR, "FCS Diagnostic 10: +"
+ + e.getMessage());
+ }
+ catch (Exception e) {
+ addError(StatusCodes.UNKNOWN_QUERY_ERROR, "FCS Diagnostic 10: +"
+ + "Unexpected error while parsing query.");
+ }
+ return fcsQuery;
+ }
+
+ private Map<String, Object> parseFCSQuery(QueryNode queryNode) {
+ Map<String, Object> queryMap = parseQueryNode(queryNode);
+ if (queryMap == null) {
+ addError(StatusCodes.UNKNOWN_QUERY_ERROR, "SRU diagnostic 47:"
+ + " Failed parsing query for unknown reasons.");
+ }
+ return queryMap;
+
+ }
+
+ private Map<String, Object> parseQueryNode(QueryNode queryNode) {
+ Map<String, Object> queryMap = null;
+
+ if (queryNode instanceof QuerySegment) {
+ queryMap = parseQuerySegment((QuerySegment) queryNode);
+// } else if (queryNode instanceof QueryGroup) {
+//
+// } else if (queryNode instanceof QuerySequence) {
+//
+// } else if (queryNode instanceof QueryDisjunction) {
+//
+// } else if (queryNode instanceof QueryWithWithin) {
+
+ }else {
+ addError(StatusCodes.QUERY_TOO_COMPLEX, "FCS diagnostic 11:"
+ + queryNode.getNodeType().name()
+ + " is currently unsupported.");
+ }
+
+ return queryMap;
+ }
+
+ private Map<String, Object> parseQuerySegment(QuerySegment segment) {
+ Map<String, Object> queryMap = null;
+
+ if ((segment.getMinOccurs() == 1) && (segment.getMaxOccurs() == 1)) {
+ queryMap = parseExpression(segment.getExpression());
+ } else {
+ addError(StatusCodes.QUERY_TOO_COMPLEX, "FCS diagnostic 11:"
+ + "Query is too complex.");
+ }
+ return queryMap;
+ }
+
+ private Map<String, Object> parseExpression(QueryNode queryNode) {
+ Map<String, Object> queryMap = null;
+
+ if (queryNode instanceof Expression) {
+ Expression expression = (Expression) queryNode;
+ queryMap = parseLayer(expression);
+ }
+ // else if (queryNode instanceof ExpressionAnd) {
+ //
+ // }
+ // else if (queryNode instanceof ExpressionGroup) {
+ //
+ // }
+ // else if (queryNode instanceof ExpressionNot) {
+ //
+ // }
+ // else if (queryNode instanceof ExpressionOr) {
+ //
+ // }
+ // else if (queryNode instanceof ExpressionWildcard) {
+ //
+ // }
+ else {
+ addError(StatusCodes.QUERY_TOO_COMPLEX, "FCS diagnostic 11:"
+ + "Query is too complex.");
+ }
+ return queryMap;
+ }
+
+ private Map<String, Object> parseLayer(Expression expression) {
+ String layer = parseLayerIdentifier(expression.getLayerIdentifier());
+ String foundry = parseQualifier(expression.getLayerQualifier(), layer);
+ String operator = parseOperator(expression.getOperator());
+ boolean isCaseSensitive = parseRegexFlags(expression.getRegexFlags());
+ String term = expression.getRegexValue();
+
+ return writeTerm(term, foundry, layer, operator, isCaseSensitive);
+ }
+ private String parseLayerIdentifier(String identifier) {
+ String layer = null;
+ if (identifier == null) {
+ // throw exception
+ } else if (identifier.equals("text")) {
+ layer = "orth";
+ } else if (identifier.equals("pos")) {
+ layer = "p";
+ } else if (identifier.equals("lemma")) {
+ layer = "l";
+ } else {
+ addError(StatusCodes.UNKNOWN_QUERY_ELEMENT, "SRU diagnostic 48:"
+ + identifier + " is unsupported.");
+ }
+
+ return layer;
+ }
+
+ private String parseQualifier(String qualifier, String layer) {
+ // Set default foundry
+ if (qualifier == null) {
+ if (layer.equals("orth")) {
+ qualifier = Foundry.OPENNLP.name().toLowerCase();
+ } else {
+ qualifier = Foundry.TT.name().toLowerCase();
+ }
+ } else if (qualifier.equals(Foundry.OPENNLP.name().toLowerCase())
+ && layer.equals("lemma")) {
+ addError(StatusCodes.UNKNOWN_QUERY_ELEMENT, "SRU diagnostic 48:"
+ + "Layer lemma with qualifier opennlp is unsupported.");
+ } else if (!supportedFoundries.contains(qualifier)) {
+ addError(StatusCodes.UNKNOWN_QUERY_ELEMENT, "SRU diagnostic 48:"
+ + "Layer " + layer + " with qualifier" + qualifier
+ + " is unsupported.");
+ }
+ return qualifier;
+ }
+
+ private String parseOperator(Operator operator) {
+ String matchOperator = null;
+ if (operator == null || operator == Operator.EQUALS) {
+ matchOperator = "match:eq";
+ } else if (operator == Operator.NOT_EQUALS) {
+ matchOperator = "match:ne";
+ } else {
+ addError(StatusCodes.UNKNOWN_QUERY_ELEMENT, "SRU diagnostic 37:"
+ + operator.name() + " is unsupported.");
+ }
+ return matchOperator;
+ }
+
+ private boolean parseRegexFlags(Set<RegexFlag> set) {
+ // default case sensitive
+ boolean flag = true;
+ if (set != null) {
+ for (RegexFlag f : set) {
+ if (f == RegexFlag.CASE_SENSITVE) {
+ continue;
+ } else if (f == RegexFlag.CASE_INSENSITVE) {
+ flag = false;
+ } else {
+ addError(StatusCodes.UNKNOWN_QUERY_ELEMENT,
+ "SRU diagnostic 48:" + f.name()
+ + " is unsupported.");
+ }
+ }
+ }
+ return flag;
+ }
+
+ private Map<String, Object> writeTerm(String term, String foundry,
+ String layer, String operator, boolean isCaseSensitive) {
+ Map<String, Object> map = new LinkedHashMap<String, Object>();
+ map.put("@type", "koral:term");
+ if (!isCaseSensitive) {
+ map.put("caseInsensitive", "true");
+ }
+ map.put("key", term);
+ map.put("foundry", foundry);
+ map.put("layer", layer);
+ map.put("match", operator);
+
+ Map<String, Object> tokenMap = new LinkedHashMap<String, Object>();
+ tokenMap.put("@type", "koral:token");
+ tokenMap.put("wrap", map);
+ return tokenMap;
+ }
+
+}
diff --git a/src/main/java/de/ids_mannheim/korap/query/serialize/QuerySerializer.java b/src/main/java/de/ids_mannheim/korap/query/serialize/QuerySerializer.java
index 643eb08..b2cd727 100644
--- a/src/main/java/de/ids_mannheim/korap/query/serialize/QuerySerializer.java
+++ b/src/main/java/de/ids_mannheim/korap/query/serialize/QuerySerializer.java
@@ -16,7 +16,8 @@
* KoralQuery
*
* @author Joachim Bingel (bingel@ids-mannheim.de),
- * Michael Hanl (hanl@ids-mannheim.de)
+ * Michael Hanl (hanl@ids-mannheim.de),
+ * Eliza Margaretha (margaretha@ids-mannheim.de)
* @version 0.3.0
* @since 0.1.0
*/
@@ -111,6 +112,8 @@
ast = new PoliqarpPlusQueryProcessor(query);
}else if (queryLanguage.equalsIgnoreCase("cql")) {
ast = new CqlQueryProcessor(query);
+ } else if (queryLanguage.equalsIgnoreCase("fcsql")) {
+ ast = new FCSQLQueryProcessor(query, "2.0");
}else if (queryLanguage.equalsIgnoreCase("annis")) {
ast = new AnnisQueryProcessor(query);
}else {
@@ -134,14 +137,22 @@
}else if (ql.equalsIgnoreCase("poliqarpplus")) {
ast = new PoliqarpPlusQueryProcessor(query);
}else if (ql.equalsIgnoreCase("cql")) {
- if (version == null)
+ if (version == null) {
ast = new CqlQueryProcessor(query);
- else
+ } else {
ast = new CqlQueryProcessor(query, version);
- }else if (ql.equalsIgnoreCase("annis")) {
+ }
+ } else if (ql.equalsIgnoreCase("fcsql")) {
+ if (version == null) {
+ ast.addError(StatusCodes.MISSING_VERSION,
+ "SRU Version is missing!");
+ } else {
+ ast = new FCSQLQueryProcessor(query, version);
+ }
+ } else if (ql.equalsIgnoreCase("annis")) {
ast = new AnnisQueryProcessor(query);
}else {
- ast.addError(StatusCodes.UNKNOWN_QL,
+ ast.addError(StatusCodes.UNKNOWN_QUERY_LANGUAGE,
ql + " is not a supported query language!");
}
return this;
diff --git a/src/main/java/de/ids_mannheim/korap/query/serialize/util/StatusCodes.java b/src/main/java/de/ids_mannheim/korap/query/serialize/util/StatusCodes.java
index 10614ea..1f896b3 100644
--- a/src/main/java/de/ids_mannheim/korap/query/serialize/util/StatusCodes.java
+++ b/src/main/java/de/ids_mannheim/korap/query/serialize/util/StatusCodes.java
@@ -7,7 +7,9 @@
public final static int INVALID_CLASS_REFERENCE = 304;
public final static int INCOMPATIBLE_OPERATOR_AND_OPERAND = 305;
public final static int UNKNOWN_QUERY_ELEMENT = 306;
- public final static int UNKNOWN_QL = 307;
+ public final static int UNKNOWN_QUERY_LANGUAGE = 307;
public final static int UNBOUND_ANNIS_RELATION = 308;
- public final static int UNKNOWN_QUERY_ERROR = 399;
-}
+ public final static int MISSING_VERSION = 309;
+ public final static int QUERY_TOO_COMPLEX = 310;
+ public final static int UNKNOWN_QUERY_ERROR = 399;
+}
\ No newline at end of file
diff --git a/src/test/java/de/ids_mannheim/korap/query/serialize/FcsqlQueryProcessorTest.java b/src/test/java/de/ids_mannheim/korap/query/serialize/FcsqlQueryProcessorTest.java
new file mode 100644
index 0000000..729381e
--- /dev/null
+++ b/src/test/java/de/ids_mannheim/korap/query/serialize/FcsqlQueryProcessorTest.java
@@ -0,0 +1,90 @@
+package de.ids_mannheim.korap.query.serialize;
+
+import static org.junit.Assert.assertEquals;
+
+
+import org.junit.Test;
+
+import com.fasterxml.jackson.core.JsonProcessingException;
+import com.fasterxml.jackson.databind.ObjectMapper;
+
+public class FcsqlQueryProcessorTest {
+ ObjectMapper mapper = new ObjectMapper();
+
+ private void runAndValidate(String query, String jsonLD)
+ throws JsonProcessingException {
+ FCSQLQueryProcessor tree = new FCSQLQueryProcessor(query, "2.0");
+ String serializedQuery = mapper.writeValueAsString(tree.getRequestMap()
+ .get("query"));
+ assertEquals(jsonLD.replace(" ", ""), serializedQuery.replace("\"", ""));
+ }
+
+ @Test
+ public void testTermQuery() throws JsonProcessingException {
+ String query = "\"Sonne\"";
+ String jsonLd = "{@type:koral:token, wrap:{@type:koral:term, key:Sonne, "
+ + "foundry:opennlp, layer:orth, match:match:eq}}";
+ runAndValidate(query, jsonLd);
+ }
+
+ @Test
+ public void testTermQueryWithRegexFlag() throws JsonProcessingException {
+ String query = "\"Fliegen\" /c";
+ String jsonLd = "{@type:koral:token, wrap:{@type:koral:term, caseInsensitive:true, "
+ + "key:Fliegen, foundry:opennlp, layer:orth, match:match:eq}}";
+ runAndValidate(query, jsonLd);
+ }
+
+ @Test
+ public void testTermQueryWithSpecificLayer() throws JsonProcessingException {
+ String query = "[text = \"Sonne\"]";
+ String jsonLd = "{@type:koral:token, wrap:{@type:koral:term, key:Sonne, "
+ + "foundry:opennlp, layer:orth, match:match:eq}}";
+ runAndValidate(query, jsonLd);
+
+ query = "[lemma = \"sein\"]";
+ jsonLd = "{@type:koral:token, wrap:{@type:koral:term, key:sein, "
+ + "foundry:tt, layer:l, match:match:eq}}";
+ runAndValidate(query, jsonLd);
+
+ query = "[pos = \"NN\"]";
+ jsonLd = "{@type:koral:token, wrap:{@type:koral:term, key:NN, "
+ + "foundry:tt, layer:p, match:match:eq}}";
+ runAndValidate(query, jsonLd);
+ }
+
+ @Test
+ public void testTermQueryWithQualifier() throws JsonProcessingException {
+ String query = "[mate:lemma = \"sein\"]";
+ String jsonLd = "{@type:koral:token, wrap:{@type:koral:term, key:sein, "
+ + "foundry:mate, layer:l, match:match:eq}}";
+ runAndValidate(query, jsonLd);
+
+ query = "[cnx:pos = \"N\"]";
+ jsonLd = "{@type:koral:token, wrap:{@type:koral:term, key:N, "
+ + "foundry:cnx, layer:p, match:match:eq}}";
+ runAndValidate(query, jsonLd);
+ }
+
+ @Test
+ public void testMatchOperation() throws JsonProcessingException {
+ String query = "[cnx:pos != \"N\"]";
+ String jsonLd = "{@type:koral:token, wrap:{@type:koral:term, key:N, "
+ + "foundry:cnx, layer:p, match:match:ne}}";
+ runAndValidate(query, jsonLd);
+ }
+
+ // @Test
+ // public void testSequenceQuery() throws JsonProcessingException {
+ // String query = "\"blaue\" [pos = \"NN\"]";
+ // String jsonLd =
+ // "{@type:koral:group, operation:operation:sequence, operands:["
+ // +
+ // "{@type:koral:token, wrap:{@type:koral:term, key:blaue, foundry:opennlp, layer:orth, match:match:eq}},"
+ // +
+ // "{@type:koral:token, wrap:{@type:koral:term, key:NN, foundry:tt, layer:p, match:match:eq}}"
+ // + "]}";
+ // runAndValidate(query, jsonLd);
+ // }
+
+}