Fixed attribute wrapping in cosmas #elem()
Change-Id: I1dce5f05ccaa68ac31aa007c263fe172f680592e
diff --git a/src/main/antlr/cosmas/c2ps_opELEM.g b/src/main/antlr/cosmas/c2ps_opELEM.g
index 5a6730c..a5b286c 100644
--- a/src/main/antlr/cosmas/c2ps_opELEM.g
+++ b/src/main/antlr/cosmas/c2ps_opELEM.g
@@ -29,7 +29,7 @@
// remove '#' from ID to avoid #ELEM(C) being tokenized as an ID;
// stating '#' should not start an ID has no effect in ANTLR.
// ID may contain an escaped ', e.g. l\'été.
-ID : (~('#'|'\''|' '|'='|'!'|'<'|'>'|')') | ('\\' '\''))+;
+ID : (~('#'|'\''|' '|'='|'!'|'<'|'>'|')') | ('\\' '\''))+;
// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
//
@@ -49,4 +49,4 @@
op : '=' -> ^(EQ)
| ('<>' | '!=') -> ^(NOTEQ);
-
\ No newline at end of file
+
diff --git a/src/main/java/de/ids_mannheim/korap/query/serialize/Cosmas2QueryProcessor.java b/src/main/java/de/ids_mannheim/korap/query/serialize/Cosmas2QueryProcessor.java
index fbf63ac..0d5980a 100644
--- a/src/main/java/de/ids_mannheim/korap/query/serialize/Cosmas2QueryProcessor.java
+++ b/src/main/java/de/ids_mannheim/korap/query/serialize/Cosmas2QueryProcessor.java
@@ -25,6 +25,7 @@
* Map representation of CosmasII syntax tree as returned by ANTLR
*
* @author Joachim Bingel (bingel@ids-mannheim.de)
+ * @author Nils Diewald (diewald@ids-mannheim.de)
* @version 0.3
*/
public class Cosmas2QueryProcessor extends Antlr3AbstractQueryProcessor {
@@ -708,6 +709,7 @@
}
+ // TODO: The handling of attributes vs. element names is somehow disputable ...
@SuppressWarnings("unchecked")
private void processOPELEM (Tree node) {
// Step I: create element
@@ -722,11 +724,35 @@
else {
int elname = 0;
Tree elnameNode = getFirstChildWithCat(node, "ELNAME");
+ /*
+ // TODO: This is identical to processOPMORPH
+ String wordOrRegex = "\\w+|\".+?\"";
+ Pattern p = Pattern.compile("((\\w+)/)?((\\w*)(!?=))?(" + wordOrRegex
+ + ")(:(" + wordOrRegex + "))?");
+ */
+
if (elnameNode != null) {
+ /*
span.put("key", elnameNode.getChild(0).toStringTree()
.toLowerCase());
+ */
+ LinkedHashMap<String, Object> fm =
+ termToFieldMap(elnameNode.getChild(0).toStringTree());
+
+ if (fm == null)
+ return;
+
+ // Workaround for things like #ELEM(S) to become #ELEM(s)
+ if (fm.get("foundry") == null &&
+ fm.get("layer") == null &&
+ fm.get("key") != null) {
+ fm.put("key", fm.get("key").toString().toLowerCase());
+ };
+ span.put("wrap", fm);
elname = 1;
+
}
+
if (node.getChildCount() > elname) {
/*
* Attributes can carry several values, like #ELEM(W
@@ -801,7 +827,14 @@
termGroup = (LinkedHashMap<String, Object>) termGroupOperands
.get(0);
}
- span.put("attr", termGroup);
+
+ // TODO: This should be improved ...
+ if (elname == 0) {
+ span.put("wrap", termGroup);
+ }
+ else {
+ span.put("attr", termGroup);
+ }
}
}
// Step II: decide where to put
@@ -816,61 +849,21 @@
LinkedHashMap<String, Object> token = KoralObjectGenerator.makeToken();
ArrayList<Object> terms = new ArrayList<Object>();
LinkedHashMap<String, Object> fieldMap = null;
- // regex group #2 is foundry, #4 layer, #5 operator,
- // #6 key, #8 value
- String wordOrRegex = "\\w+|\".+?\"";
- Pattern p = Pattern.compile("((\\w+)/)?((\\w*)(!?=))?(" + wordOrRegex
- + ")(:(" + wordOrRegex + "))?");
- Matcher m;
+
for (String morphterm : morphterms) {
- m = p.matcher(morphterm);
- if (!m.matches()) {
- addError(StatusCodes.INCOMPATIBLE_OPERATOR_AND_OPERAND,
- "Something went wrong parsing the argument in MORPH().");
- requestMap.put("query", new LinkedHashMap<String, Object>());
- return;
- }
- fieldMap = KoralObjectGenerator.makeTerm();
-
- if (m.group(2) != null)
- fieldMap.put("foundry", m.group(2));
- if (m.group(4) != null)
- fieldMap.put("layer", m.group(4));
- if (m.group(5) != null) {
- if ("!=".equals(m.group(5)))
- negate = !negate;
- }
- if (m.group(6) != null) {
- String key = m.group(6);
- if (key.startsWith("\"") && key.endsWith("\"")) {
- key = key.substring(1, key.length() - 1);
- fieldMap.put("type", "type:regex");
- }
- fieldMap.put("key", key);
- }
-
- if (m.group(8) != null) {
- String value = m.group(8);
- if (value.startsWith("\"") && value.endsWith("\"")) {
- value = value.substring(1, value.length() - 1);
- fieldMap.put("type", "type:regex");
- }
- fieldMap.put("value", value);
- }
-
- // negate field (see above)
- if (negate) {
- fieldMap.put("match", "match:ne");
- }
- else {
- fieldMap.put("match", "match:eq");
- }
+ fieldMap = termToFieldMap(morphterm);
+ if (fieldMap == null) {
+ return;
+ };
+
terms.add(fieldMap);
}
+
if (morphterms.length == 1) {
token.put("wrap", fieldMap);
}
+
else {
LinkedHashMap<String, Object> termGroup = KoralObjectGenerator
.makeTermGroup("and");
@@ -1310,7 +1303,64 @@
return rewrittenQuery;
}
+ private LinkedHashMap<String, Object> termToFieldMap (String term) {
+ // regex group #2 is foundry, #4 layer, #5 operator,
+ // #6 key, #8 value
+ String wordOrRegex = "\\w+|\".+?\"";
+ // TODO: Should be initialized globally
+ Pattern p = Pattern.compile("((\\w+)/)?((\\w*)(!?=))?(" + wordOrRegex
+ + ")(:(" + wordOrRegex + "))?");
+ Matcher m;
+
+ m = p.matcher(term);
+ if (!m.matches()) {
+ addError(StatusCodes.INCOMPATIBLE_OPERATOR_AND_OPERAND,
+ "Something went wrong parsing the argument in MORPH() or #ELEM().");
+ requestMap.put("query", new LinkedHashMap<String, Object>());
+ return null;
+ };
+
+ LinkedHashMap<String, Object> fieldMap = null;
+ fieldMap = KoralObjectGenerator.makeTerm();
+
+ if (m.group(2) != null)
+ fieldMap.put("foundry", m.group(2));
+ if (m.group(4) != null)
+ fieldMap.put("layer", m.group(4));
+ if (m.group(5) != null) {
+ if ("!=".equals(m.group(5)))
+ negate = !negate;
+ }
+ if (m.group(6) != null) {
+ String key = m.group(6);
+ if (key.startsWith("\"") && key.endsWith("\"")) {
+ key = key.substring(1, key.length() - 1);
+ fieldMap.put("type", "type:regex");
+ }
+ fieldMap.put("key", key);
+ }
+
+ if (m.group(8) != null) {
+ String value = m.group(8);
+ if (value.startsWith("\"") && value.endsWith("\"")) {
+ value = value.substring(1, value.length() - 1);
+ fieldMap.put("type", "type:regex");
+ }
+ fieldMap.put("value", value);
+ }
+
+ // negate field (see above)
+ if (negate) {
+ fieldMap.put("match", "match:ne");
+ }
+ else {
+ fieldMap.put("match", "match:eq");
+ };
+ return fieldMap;
+ };
+
+
private Tree parseCosmasQuery (String query) {
query = rewritePositionQuery(query);
Tree tree = null;
diff --git a/src/test/java/de/ids_mannheim/korap/query/test/Cosmas2QueryProcessorTest.java b/src/test/java/de/ids_mannheim/korap/query/test/Cosmas2QueryProcessorTest.java
index 5b43fd2..6d6d59c 100644
--- a/src/test/java/de/ids_mannheim/korap/query/test/Cosmas2QueryProcessorTest.java
+++ b/src/test/java/de/ids_mannheim/korap/query/test/Cosmas2QueryProcessorTest.java
@@ -217,13 +217,20 @@
assertEquals("lang", res.at("/query/operands/3/wrap/key").asText());
assertEquals(true, res.at("/query/operands/4").isMissingNode());
+ query = "#ELEM(s)";
+ qs.setQuery(query, "cosmas2");
+ res = mapper.readTree(qs.toJSON());
+ assertEquals("koral:span", res.at("/query/@type").asText());
+ assertEquals("s", res.at("/query/wrap/key").asText());
+ assertEquals(true, res.at("/query/key").isMissingNode());
+
query = "der #ELEM(W)";
qs.setQuery(query, "cosmas2");
res = mapper.readTree(qs.toJSON());
assertEquals("koral:group", res.at("/query/@type").asText());
assertEquals("operation:sequence", res.at("/query/operation").asText());
assertEquals("der", res.at("/query/operands/0/wrap/key").asText());
- assertEquals("w", res.at("/query/operands/1/key").asText());
+ assertEquals("w", res.at("/query/operands/1/wrap/key").asText());
assertEquals("koral:span", res.at("/query/operands/1/@type").asText());
assertEquals(true, res.at("/query/operands/2").isMissingNode());
@@ -233,7 +240,7 @@
assertEquals("koral:group", res.at("/query/@type").asText());
assertEquals("operation:sequence", res.at("/query/operation").asText());
assertEquals("der", res.at("/query/operands/0/wrap/key").asText());
- assertEquals("w", res.at("/query/operands/1/key").asText());
+ assertEquals("w", res.at("/query/operands/1/wrap/key").asText());
assertEquals("koral:span", res.at("/query/operands/1/@type").asText());
assertEquals("Mann", res.at("/query/operands/2/wrap/key").asText());
assertEquals(true, res.at("/query/operands/3").isMissingNode());
@@ -1153,13 +1160,22 @@
qs.setQuery(query, "cosmas2");
res = mapper.readTree(qs.toJSON());
assertEquals("koral:span", res.at("/query/@type").asText());
- assertEquals("s", res.at("/query/key").asText());
+ assertEquals("s", res.at("/query/wrap/key").asText());
+
+ query = "#ELEM(base/c=NP)";
+ qs.setQuery(query, "cosmas2");
+ res = mapper.readTree(qs.toJSON());
+ assertEquals("koral:span", res.at("/query/@type").asText());
+ assertEquals("base", res.at("/query/wrap/foundry").asText());
+ assertEquals("c", res.at("/query/wrap/layer").asText());
+ assertEquals("NP", res.at("/query/wrap/key").asText());
query = "#ELEM(W ANA=N)";
qs.setQuery(query, "cosmas2");
res = mapper.readTree(qs.toJSON());
assertEquals("koral:span", res.at("/query/@type").asText());
- assertEquals("w", res.at("/query/key").asText());
+ assertEquals("w", res.at("/query/wrap/key").asText());
+
assertEquals("koral:term", res.at("/query/attr/@type").asText());
assertEquals("N", res.at("/query/attr/key").asText());
assertEquals("p", res.at("/query/attr/layer").asText());
@@ -1169,7 +1185,7 @@
qs.setQuery(query, "cosmas2");
res = mapper.readTree(qs.toJSON());
assertEquals("koral:span", res.at("/query/@type").asText());
- assertEquals("w", res.at("/query/key").asText());
+ assertEquals("w", res.at("/query/wrap/key").asText());
assertEquals("koral:termGroup", res.at("/query/attr/@type").asText());
assertEquals("relation:and", res.at("/query/attr/relation").asText());
assertEquals("koral:term", res.at("/query/attr/operands/0/@type")
@@ -1189,7 +1205,7 @@
qs.setQuery(query, "cosmas2");
res = mapper.readTree(qs.toJSON());
assertEquals("koral:span", res.at("/query/@type").asText());
- assertEquals("w", res.at("/query/key").asText());
+ assertEquals("w", res.at("/query/wrap/key").asText());
assertEquals("koral:termGroup", res.at("/query/attr/@type").asText());
assertEquals("relation:and", res.at("/query/attr/relation").asText());
assertEquals("koral:termGroup", res.at("/query/attr/operands/0/@type")
@@ -1209,7 +1225,7 @@
qs.setQuery(query, "cosmas2");
res = mapper.readTree(qs.toJSON());
assertEquals("koral:span", res.at("/query/@type").asText());
- assertEquals("w", res.at("/query/key").asText());
+ assertEquals("w", res.at("/query/wrap/key").asText());
assertEquals("koral:termGroup", res.at("/query/attr/@type").asText());
assertEquals("relation:and", res.at("/query/attr/relation").asText());
assertEquals("koral:termGroup", res.at("/query/attr/operands/0/@type")