blob: 1dc4a4b4f439f894c8f9b47ac81a6e5a083179b6 [file] [log] [blame]
package de.ids_mannheim.korap.query.parse.fcsql;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Set;
import org.hamcrest.core.IsNot;
import de.ids_mannheim.korap.query.elements.KoralTerm;
import de.ids_mannheim.korap.query.elements.KoralTerm.KoralTermType;
import de.ids_mannheim.korap.query.elements.KoralTermGroup;
import de.ids_mannheim.korap.query.elements.KoralToken;
import de.ids_mannheim.korap.query.elements.KoralRelation;
import de.ids_mannheim.korap.query.elements.KoralGroup.Distance;
import de.ids_mannheim.korap.query.serialize.FCSQLQueryProcessor;
import de.ids_mannheim.korap.query.serialize.util.StatusCodes;
import eu.clarin.sru.server.fcs.parser.Expression;
import eu.clarin.sru.server.fcs.parser.ExpressionAnd;
import eu.clarin.sru.server.fcs.parser.ExpressionNot;
import eu.clarin.sru.server.fcs.parser.ExpressionOr;
import eu.clarin.sru.server.fcs.parser.Operator;
import eu.clarin.sru.server.fcs.parser.QueryNode;
import eu.clarin.sru.server.fcs.parser.RegexFlag;
public class ExpressionParser {
private static final String FOUNDRY_CNX = "cnx";
private static final String FOUNDRY_OPENNLP = "opennlp";
private static final String FOUNDRY_TT = "tt";
private static final String FOUNDRY_MATE = "mate";
private static final String FOUNDRY_XIP = "xip";
private List<String> supportedFoundries = Arrays
.asList(new String[] { FOUNDRY_CNX, FOUNDRY_OPENNLP, FOUNDRY_TT,
FOUNDRY_MATE, FOUNDRY_XIP });
private FCSQLQueryProcessor processor;
public ExpressionParser (FCSQLQueryProcessor processor) {
this.processor = processor;
}
public Object parseExpression(QueryNode queryNode) {
return parseExpression(queryNode, false, true);
}
public Object parseExpression(QueryNode queryNode, boolean isNot,
boolean isToken) {
if (queryNode instanceof Expression) {
return parseSimpleExpression((Expression) queryNode, isNot, isToken);
}
else if (queryNode instanceof ExpressionAnd) {
List<QueryNode> operands = queryNode.getChildren();
if (isNot) {
return parseBooleanExpression(operands, KoralRelation.OR);
}
else {
return parseBooleanExpression(operands, KoralRelation.AND);
}
}
// else if (queryNode instanceof ExpressionGroup) {
//
// }
else if (queryNode instanceof ExpressionNot) {
boolean negation = isNot ? false : true;
return parseExpression(queryNode.getChild(0), negation, isToken);
}
else if (queryNode instanceof ExpressionOr) {
List<QueryNode> operands = queryNode.getChildren();
if (isNot) {
return parseBooleanExpression(operands, KoralRelation.AND);
}
else {
return parseBooleanExpression(operands, KoralRelation.OR);
}
}
// else if (queryNode instanceof ExpressionWildcard) {
// for distance query, using empty token
// }
else {
processor.addError(StatusCodes.QUERY_TOO_COMPLEX,
"FCS diagnostic 11: Query is too complex.");
return null;
}
}
private Object parseBooleanExpression(List<QueryNode> operands,
KoralRelation relation) {
KoralTermGroup termGroup = new KoralTermGroup(this, relation, operands);
return new KoralToken(termGroup);
}
private Object parseSimpleExpression(Expression expression, boolean isNot,
boolean isToken) {
KoralTerm koralTerm = parseTerm(expression, isNot);
if (isToken) {
return new KoralToken(koralTerm);
}
else {
return koralTerm;
}
}
public KoralTerm parseTerm(Expression expression, boolean isNot) {
KoralTerm koralTerm = new KoralTerm();
koralTerm.setType(KoralTermType.REGEX);
koralTerm.setKey(expression.getRegexValue());
parseLayerIdentifier(koralTerm, expression.getLayerIdentifier());
parseQualifier(koralTerm, expression.getLayerQualifier());
parseOperator(koralTerm, expression.getOperator(), isNot);
parseRegexFlags(koralTerm, expression.getRegexFlags());
return koralTerm;
}
private void parseLayerIdentifier(KoralTerm koralTerm, String identifier) {
String layer = null;
if (identifier == null) {
processor.addError(StatusCodes.MALFORMED_QUERY,
"FCS diagnostic 10: Layer identifier is missing.");
koralTerm.setInvalid(true);
}
else if (identifier.equals("text")) {
layer = "orth";
}
else if (identifier.equals("pos")) {
layer = "p";
}
else if (identifier.equals("lemma")) {
layer = "l";
}
else {
processor.addError(StatusCodes.UNKNOWN_QUERY_ELEMENT,
"SRU diagnostic 48: Layer " + identifier
+ " is unsupported.");
koralTerm.setInvalid(true);
}
koralTerm.setLayer(layer);
}
private void parseQualifier(KoralTerm koralTerm, String qualifier) {
String layer = koralTerm.getLayer();
if (layer == null) {
koralTerm.setInvalid(true);
return;
}
// Set default foundry
if (qualifier == null) {
if (layer.equals("orth")) {
qualifier = FOUNDRY_OPENNLP;
}
else {
qualifier = FOUNDRY_TT;
}
}
else if (qualifier.equals(FOUNDRY_OPENNLP) && layer.equals("l")) {
processor
.addError(StatusCodes.UNKNOWN_QUERY_ELEMENT,
"SRU diagnostic 48: Layer lemma with qualifier opennlp is unsupported.");
koralTerm.setInvalid(true);
}
else if (!supportedFoundries.contains(qualifier)) {
processor.addError(StatusCodes.UNKNOWN_QUERY_ELEMENT,
"SRU diagnostic 48: Qualifier " + qualifier
+ " is unsupported.");
koralTerm.setInvalid(true);
}
koralTerm.setFoundry(qualifier);
}
private void parseOperator(KoralTerm koralTerm, Operator operator,
boolean isNot) {
String matchOperator = null;
if (operator == null || operator == Operator.EQUALS) {
matchOperator = isNot ? "match:ne" : "match:eq";
}
else if (operator == Operator.NOT_EQUALS) {
matchOperator = isNot ? "match:eq" : "match:ne";
}
else {
processor
.addError(StatusCodes.UNKNOWN_QUERY_ELEMENT,
"SRU diagnostic 37:" + operator.name()
+ " is unsupported.");
koralTerm.setInvalid(true);
}
koralTerm.setOperator(matchOperator);
}
private void parseRegexFlags(KoralTerm koralTerm, Set<RegexFlag> set) {
// default case sensitive
if (set != null) {
for (RegexFlag f : set) {
if (f == RegexFlag.CASE_SENSITVE) {
koralTerm.setCaseSensitive(true);
}
else if (f == RegexFlag.CASE_INSENSITVE) {
koralTerm.setCaseSensitive(false);
}
else {
processor.addError(StatusCodes.UNKNOWN_QUERY_ELEMENT,
"SRU diagnostic 48:" + f.name()
+ " is unsupported.");
koralTerm.setInvalid(true);
}
}
}
}
}