blob: c011d6e16e166a962f135fbd8d46b2e4b4810f58 [file] [log] [blame]
package de.ids_mannheim.korap;
import de.ids_mannheim.korap.query.wrap.*;
import de.ids_mannheim.korap.util.QueryException;
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.util.automaton.RegExp;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.JsonNode;
import java.util.*;
import java.io.*;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/*
TODO: Create Pre-filter while preparing a Query.
The pre-filter will contain a boolena query with all
necessary terms, supporting boolean OR, ignoring
negation terms (and negation subqueries), like
[base=Der]([base=alte]|[base=junge])[base=Mann & p!=ADJA]![base=war | base=lag]
Search for all documents containing "s:Der" and ("s:alte" or "s:junge") and "s:Mann"
*/
/**
* @author Nils Diewald
*
* KorapQuery implements a simple API for wrapping
* KorAP Lucene Index specific query classes.
*/
public class KorapQuery {
private String field;
private ObjectMapper json;
private String defaultFoundry = "mate/";
// Logger
private final static Logger log = LoggerFactory.getLogger(KorapQuery.class);
/**
* Constructs a new base object for query generation.
* @param field The specific index field for the query.
*/
public KorapQuery (String field) {
this.field = field;
this.json = new ObjectMapper();
};
public SpanQueryWrapperInterface fromJSON (String jsonString) throws QueryException {
JsonNode json;
try {
json = this.json.readValue(jsonString, JsonNode.class);
}
catch (IOException e) {
throw new QueryException(e.getMessage());
};
if (!json.has("@type") && json.has("query"))
json = json.get("query");
return this.fromJSON(json);
};
// http://fasterxml.github.io/jackson-databind/javadoc/2.2.0/com/fasterxml/jackson/databind/JsonNode.html
// TODO: Exception messages are horrible!
// TODO: Use the shortcuts implemented in this class instead of the wrapper constructors
// TODO: Check for isArray()
// TODO: Check for the number of operands before getting them
public SpanQueryWrapperInterface fromJSON (JsonNode json) throws QueryException {
if (!json.has("@type")) {
throw new QueryException("JSON-LD group has no @type attribute");
};
String type = json.get("@type").asText();
switch (type) {
case "korap:group":
SpanClassQueryWrapper classWrapper;
if (!json.has("relation")) {
if (json.has("class")) {
return new SpanClassQueryWrapper(
this.fromJSON(json.get("operands").get(0)),
json.get("class").asInt(0)
);
}
throw new QueryException("Group needs a relation or a class");
};
String relation = json.get("relation").asText();
if (!json.has("operands"))
throw new QueryException("Operation needs operands");
// Alternation
switch (relation) {
case "or":
SpanAlterQueryWrapper ssaq = new SpanAlterQueryWrapper(this.field);
for (JsonNode operand : json.get("operands")) {
ssaq.or(this.fromJSON(operand));
};
if (json.has("class")) {
return new SpanClassQueryWrapper(ssaq, json.get("class").asInt(0));
};
return ssaq;
case "position":
if (!json.has("position"))
throw new QueryException("Operation needs position specification");
String position = json.get("position").asText();
short flag = 0;
switch (position) {
case "startswith":
flag = (short) 1;
break;
case "endswith":
flag = (short) 2;
break;
case "match":
flag = (short) 3;
break;
};
return new SpanWithinQueryWrapper(
this.fromJSON(json.get("operands").get(0)),
this.fromJSON(json.get("operands").get(1)),
flag
);
case "shrink":
int number = 0;
// temporary
if (json.has("shrink"))
number = json.get("shrink").asInt();
return new SpanMatchModifyQueryWrapper(this.fromJSON(json.get("operands").get(0)), number);
};
throw new QueryException("Unknown group relation");
case "korap:token":
return this._segFromJSON(json.get("@value"));
case "korap:sequence":
if (!json.has("operands"))
throw new QueryException("SpanSequenceQuery needs operands");
JsonNode operands = json.get("operands");
if (!operands.isArray() || operands.size() < 2)
throw new QueryException("SpanSequenceQuery needs operands");
SpanSequenceQueryWrapper sseqqw = new SpanSequenceQueryWrapper(this.field);
for (JsonNode operand : json.get("operands")) {
sseqqw.append(this.fromJSON(operand));
};
return sseqqw;
case "korap:element":
String value = json.get("@value").asText().replace('=',':');
return this.tag(value);
};
throw new QueryException("Unknown serialized query type: " + type);
};
private SpanQueryWrapperInterface _segFromJSON (JsonNode json) throws QueryException {
String type = json.get("@type").asText();
switch (type) {
case "korap:term":
switch (json.get("relation").asText()) {
case "=":
String value = json.get("@value").asText();
value = value.replaceFirst("base:", defaultFoundry +"l:").replaceFirst("orth:", "s:");
if (json.has("@subtype") && json.get("@subtype").asText().equals("korap:regex")) {
if (value.charAt(0) == '\'' || value.charAt(0) == '"') {
value = "s:" + value;
};
value = value.replace("'", "").replace("\"", "");
// Temporary
value = value.replace("_", "/");
return this.seg(this.re(value));
};
if (!value.matches("[^:]+?:.+"))
value = "s:" + value;
// Temporary
value = value.replace("_", "/");
return this.seg(value);
case "!=":
throw new QueryException("Term relation != not yet supported");
};
throw new QueryException("Unknown term relation");
case "korap:group":
SpanSegmentQueryWrapper ssegqw = new SpanSegmentQueryWrapper(this.field);
switch (json.get("relation").asText()) {
case "and":
for (JsonNode operand : json.get("operands")) {
SpanQueryWrapperInterface part = this._segFromJSON(operand);
if (part instanceof SpanAlterQueryWrapper) {
ssegqw.with((SpanAlterQueryWrapper) part);
}
else if (part instanceof SpanRegexQueryWrapper) {
ssegqw.with((SpanRegexQueryWrapper) part);
}
else if (part instanceof SpanSegmentQueryWrapper) {
ssegqw.with((SpanSegmentQueryWrapper) part);
}
else {
throw new QueryException("Object not supported in segment queries");
};
};
return ssegqw;
case "or":
SpanAlterQueryWrapper ssaq = new SpanAlterQueryWrapper(this.field);
for (JsonNode operand : json.get("operands")) {
ssaq.or(this._segFromJSON(operand));
};
return ssaq;
};
};
throw new QueryException("Unknown token type");
};
// SpanRegexQueryWrapper
/**
* Create a query object based on a regular expression.
* @param re The regular expession as a string.
*/
public SpanRegexQueryWrapper re (String re) {
return new SpanRegexQueryWrapper(this.field, re, RegExp.ALL, false);
};
/**
* Create a query object based on a regular expression.
* @param re The regular expession as a string.
* @param flas The regular expession flag as an integer.
*/
public SpanRegexQueryWrapper re (String re, int flags) {
return new SpanRegexQueryWrapper(this.field, re, flags, false);
};
/**
* Create a query object based on a regular expression.
* @param re The regular expession as a string.
* @param flag The regular expession flag.
* @param caseinsensitive A boolean value indicating case insensitivity.
*/
public SpanRegexQueryWrapper re (String re, int flags, boolean caseinsensitive) {
return new SpanRegexQueryWrapper(this.field, re, flags, caseinsensitive);
};
/**
* Create a query object based on a regular expression.
* @param re The regular expession as a string.
* @param caseinsensitive A boolean value indicating case insensitivity.
*/
public SpanRegexQueryWrapper re (String re, boolean caseinsensitive) {
return new SpanRegexQueryWrapper(this.field, re, RegExp.ALL, caseinsensitive);
};
// SpanWildcardQueryWrapper
/**
* Create a query object based on a wildcard term.
* @param wc The wildcard term as a string.
*/
public SpanWildcardQueryWrapper wc (String wc) {
return new SpanWildcardQueryWrapper(this.field, wc, false);
};
/**
* Create a query object based on a wildcard term.
* @param wc The wildcard term as a string.
* @param caseinsensitive A boolean value indicating case insensitivity.
*/
public SpanWildcardQueryWrapper wc (String wc, boolean caseinsensitive) {
return new SpanWildcardQueryWrapper(this.field, wc, caseinsensitive);
};
// SpanSegmentQueries
/**
* Create a segment query object.
*/
public SpanSegmentQueryWrapper seg () {
return new SpanSegmentQueryWrapper(this.field);
};
/**
* Create a segment query object.
* @param terms[] An array of terms, the segment consists of.
*/
public SpanSegmentQueryWrapper seg (SpanRegexQueryWrapper ... terms) {
SpanSegmentQueryWrapper ssq = new SpanSegmentQueryWrapper(this.field);
for (SpanRegexQueryWrapper t : terms) {
ssq.with(t);
};
return ssq;
};
public SpanSegmentQueryWrapper seg (SpanAlterQueryWrapper ... terms) {
SpanSegmentQueryWrapper ssq = new SpanSegmentQueryWrapper(this.field);
for (SpanAlterQueryWrapper t : terms) {
ssq.with(t);
};
return ssq;
};
public SpanSegmentQueryWrapper seg (String ... terms) {
SpanSegmentQueryWrapper ssq = new SpanSegmentQueryWrapper(this.field);
for (String t : terms) {
ssq.with(t);
};
return ssq;
};
// SpanSegmentAlterQueries
/**
* Create a segment alternation query object.
* @param terms[] An array of alternative terms.
*/
public SpanAlterQueryWrapper or (SpanQueryWrapperInterface ... terms) {
SpanAlterQueryWrapper ssaq = new SpanAlterQueryWrapper(this.field);
for (SpanQueryWrapperInterface t : terms) {
ssaq.or(t);
};
return ssaq;
};
public SpanAlterQueryWrapper or (String ... terms) {
SpanAlterQueryWrapper ssaq = new SpanAlterQueryWrapper(this.field);
for (String t : terms) {
ssaq.or(t);
};
return ssaq;
};
// SpanSegmentSequenceQueries
/**
* Create a sequence of segments query object.
*/
public SpanSequenceQueryWrapper seq () {
return new SpanSequenceQueryWrapper(this.field);
};
/**
* Create a sequence of segments query object.
* @param terms[] An array of segment defining terms.
*/
public SpanSequenceQueryWrapper seq (SpanQueryWrapperInterface ... terms) {
SpanSequenceQueryWrapper sssq = new SpanSequenceQueryWrapper(this.field);
for (SpanQueryWrapperInterface t : terms) {
sssq.append(t);
};
return sssq;
};
/**
* Create a sequence of segments query object.
* @param re A SpanSegmentRegexQuery, starting the sequence.
*/
public SpanSequenceQueryWrapper seq (SpanRegexQueryWrapper re) {
return new SpanSequenceQueryWrapper(this.field, re);
};
public SpanSequenceQueryWrapper seq (Object ... terms) {
SpanSequenceQueryWrapper ssq = new SpanSequenceQueryWrapper(this.field);
for (Object t : terms) {
if (t instanceof SpanQueryWrapperInterface) {
ssq.append((SpanQueryWrapperInterface) t);
}
else if (t instanceof SpanRegexQueryWrapper) {
ssq.append((SpanRegexQueryWrapper) t);
}
else {
log.error("{} is not an acceptable parameter for seq()", t.getClass());
return ssq;
};
};
return ssq;
};
public SpanElementQueryWrapper tag (String element) {
return new SpanElementQueryWrapper(this.field, element);
};
/**
* Create a wrapping within query object.
* @param element A SpanQuery.
* @param embedded A SpanQuery that is wrapped in the element.
*/
public SpanWithinQueryWrapper within (SpanQueryWrapperInterface element,
SpanQueryWrapperInterface embedded) {
return new SpanWithinQueryWrapper(element, embedded);
};
// Class
public SpanClassQueryWrapper _ (byte number, SpanQueryWrapperInterface element) {
return new SpanClassQueryWrapper(element, number);
};
public SpanClassQueryWrapper _ (int number, SpanQueryWrapperInterface element) {
return new SpanClassQueryWrapper(element, number);
};
public SpanClassQueryWrapper _ (short number, SpanQueryWrapperInterface element) {
return new SpanClassQueryWrapper(element, number);
};
public SpanClassQueryWrapper _ (SpanQueryWrapperInterface element) {
return new SpanClassQueryWrapper(element);
};
// MatchModify
public SpanMatchModifyQueryWrapper shrink (byte number, SpanQueryWrapperInterface element) {
return new SpanMatchModifyQueryWrapper(element, number);
};
public SpanMatchModifyQueryWrapper shrink (int number, SpanQueryWrapperInterface element) {
return new SpanMatchModifyQueryWrapper(element, number);
};
public SpanMatchModifyQueryWrapper shrink (short number, SpanQueryWrapperInterface element) {
return new SpanMatchModifyQueryWrapper(element, number);
};
public SpanMatchModifyQueryWrapper shrink (SpanQueryWrapperInterface element) {
return new SpanMatchModifyQueryWrapper(element);
};
// split
};