Fixed Annis OR group (resolved #96)
Change-Id: I495c02cee0350315550420b5aa14cbc37e7b5b50
diff --git a/.gitignore b/.gitignore
index 044bf9c..1f3d6a1 100644
--- a/.gitignore
+++ b/.gitignore
@@ -72,6 +72,8 @@
/src/main/java/de/ids_mannheim/korap/query/parse/poliqarpplus/PoliqarpPlusParser.tokens
/src/main/java/de/ids_mannheim/korap/query/parse/poliqarpplus/PoliqarpPlusLexer.java
+/src/main/java/de/ids_mannheim/korap/query/parse/cqp
+
# /src/main/java/de/ids_mannheim/korap/query/serialize/
/src/main/java/de/ids_mannheim/korap/query/serialize/SerializationSandbox.java
/lib/
diff --git a/Changes b/Changes
index abca7c0..95492f3 100644
--- a/Changes
+++ b/Changes
@@ -1,5 +1,6 @@
-0.38.1 2022-01-03
+0.38.1 2022-01-07
- [security] Updated log4j (diewald)
+ - Fixed Annis OR group (resolved #96)
0.38 2021-12-10
- [security] Updated log4j due to CVE-2021-44228 (diewald)
diff --git a/src/main/antlr/annis/AqlLexer.g4 b/src/main/antlr/annis/AqlLexer.g4
index 5b9da30..8fda46b 100644
--- a/src/main/antlr/annis/AqlLexer.g4
+++ b/src/main/antlr/annis/AqlLexer.g4
@@ -80,8 +80,12 @@
fragment RE_star : (RE_char | RE_chgroup | ( '(' REGEX ')')) '*';
fragment RE_plus : (RE_char | RE_chgroup | ( '(' REGEX ')')) '+';
fragment RE_occ : (RE_char | RE_chgroup | ( '(' REGEX ')')) FOCC;
+fragment RE_expr : '.' | RE_char | RE_alter | RE_chgroup | RE_opt | RE_quant;
+fragment RE_orgroup : '(' RE_expr* ('|' RE_expr+ )* ')' ;
fragment RE_group : '(' REGEX ')';
-REGEX : SLASH ('.' | RE_char | RE_alter | RE_chgroup | RE_opt | RE_quant | RE_group)* SLASH;
+//REGEX : SLASH ('.' | RE_char | RE_alter | RE_chgroup | RE_opt | RE_quant | RE_group)* SLASH;
+
+REGEX : SLASH (RE_expr | RE_group | RE_orgroup)* SLASH;
WS : ( ' ' | '\t' | '\r' | '\n' )+ -> skip ;
diff --git a/src/main/antlr/annis/AqlParser.g4 b/src/main/antlr/annis/AqlParser.g4
index f26eb9b..5718a77 100644
--- a/src/main/antlr/annis/AqlParser.g4
+++ b/src/main/antlr/annis/AqlParser.g4
@@ -191,5 +191,5 @@
exprTop
-: andTopExpr (OR andTopExpr)* # OrTop
+: BRACE_OPEN? andTopExpr (OR andTopExpr)* BRACE_CLOSE? # OrTop
;
\ No newline at end of file
diff --git a/src/test/java/de/ids_mannheim/korap/query/test/annis/BooleanGroupTest.java b/src/test/java/de/ids_mannheim/korap/query/test/annis/BooleanGroupTest.java
new file mode 100644
index 0000000..3fe8408
--- /dev/null
+++ b/src/test/java/de/ids_mannheim/korap/query/test/annis/BooleanGroupTest.java
@@ -0,0 +1,134 @@
+package de.ids_mannheim.korap.query.test.annis;
+
+import static org.junit.Assert.assertEquals;
+
+import org.junit.Test;
+
+import com.fasterxml.jackson.core.JsonProcessingException;
+import com.fasterxml.jackson.databind.JsonMappingException;
+import com.fasterxml.jackson.databind.JsonNode;
+import com.fasterxml.jackson.databind.ObjectMapper;
+
+import de.ids_mannheim.korap.query.serialize.QuerySerializer;
+
+public class BooleanGroupTest {
+
+ private QuerySerializer qs = new QuerySerializer();
+ private ObjectMapper mapper = new ObjectMapper();
+
+ private JsonNode runQuery (String query)
+ throws JsonProcessingException, JsonMappingException {
+ qs.setQuery(query, "annis");
+ JsonNode result = mapper.readTree(qs.toJSON());
+// System.out.println(result.toPrettyString());
+ return result;
+ }
+
+
+ @Test
+ public void testEmptyGroup () throws Exception {
+ JsonNode n = runQuery("/()/");
+ assertEquals("koral:token", n.at("/query/@type").asText());
+ assertEquals("koral:term", n.at("/query/wrap/@type").asText());
+ assertEquals("match:eq", n.at("/query/wrap/match").asText());
+ assertEquals("type:regex", n.at("/query/wrap/type").asText());
+ assertEquals("orth", n.at("/query/wrap/layer").asText());
+ assertEquals("()", n.at("/query/wrap/key").asText());
+ }
+
+ @Test
+ public void testGroupInRegex () throws Exception {
+ JsonNode n = runQuery("/(Kat.*)/");
+ assertEquals("koral:token", n.at("/query/@type").asText());
+ assertEquals("koral:term", n.at("/query/wrap/@type").asText());
+ assertEquals("match:eq", n.at("/query/wrap/match").asText());
+ assertEquals("type:regex", n.at("/query/wrap/type").asText());
+ assertEquals("orth", n.at("/query/wrap/layer").asText());
+ assertEquals("(Kat.*)", n.at("/query/wrap/key").asText());
+ }
+
+ @Test
+ public void testOrGroup () throws Exception {
+ JsonNode n = runQuery("(cat=\"S\" | cat=\"NP\")");
+ assertEquals("koral:group", n.at("/query/@type").asText());
+ assertEquals("operation:disjunction", n.at("/query/operation").asText());
+ assertEquals("koral:span", n.at("/query/operands/0/@type").asText());
+ assertEquals("koral:term", n.at("/query/operands/0/wrap/@type").asText());
+ assertEquals("match:eq", n.at("/query/operands/0/wrap/match").asText());
+ assertEquals("c", n.at("/query/operands/0/wrap/layer").asText());
+ assertEquals("S", n.at("/query/operands/0/wrap/key").asText());
+ assertEquals("c", n.at("/query/operands/1/wrap/layer").asText());
+ assertEquals("NP", n.at("/query/operands/1/wrap/key").asText());
+ }
+
+ @Test
+ public void testOrGroupRegex () throws Exception {
+ JsonNode n = runQuery("/(be|have)/");
+ assertEquals("koral:token", n.at("/query/@type").asText());
+ assertEquals("koral:term", n.at("/query/wrap/@type").asText());
+ assertEquals("match:eq", n.at("/query/wrap/match").asText());
+ assertEquals("type:regex", n.at("/query/wrap/type").asText());
+ assertEquals("orth", n.at("/query/wrap/layer").asText());
+ assertEquals("(be|have)", n.at("/query/wrap/key").asText());
+ }
+
+ @Test
+ public void testLemmaOrGroupRegex () throws Exception {
+ JsonNode n = runQuery("lemma=/(be|have)/");
+ assertEquals("koral:token", n.at("/query/@type").asText());
+ assertEquals("koral:term", n.at("/query/wrap/@type").asText());
+ assertEquals("match:eq", n.at("/query/wrap/match").asText());
+ assertEquals("type:regex", n.at("/query/wrap/type").asText());
+ assertEquals("l", n.at("/query/wrap/layer").asText());
+ assertEquals("(be|have)", n.at("/query/wrap/key").asText());
+ }
+
+
+ @Test
+ public void testAndGroup () throws Exception {
+ runQuery("tok=\"Katze\" & pos=\"N\"");
+
+ // EM: Not sure how the expected result is
+ // Koral generates KQ only for the last operand:
+ /*
+ {
+ "query" : {
+ "@type" : "koral:token",
+ "wrap" : {
+ "@type" : "koral:term",
+ "match" : "match:eq",
+ "layer" : "p",
+ "key" : "N"
+ }
+ },
+ "@context" : "http://korap.ids-mannheim.de/ns/koral/0.3/context.jsonld"
+ }
+ */
+ }
+
+ @Test
+ public void testRegexInGroup () throws Exception {
+ JsonNode n = runQuery("(/Kat.*/)");
+
+ assertEquals("koral:token", n.at("/query/@type").asText());
+ assertEquals("koral:term", n.at("/query/wrap/@type").asText());
+ assertEquals("match:eq", n.at("/query/wrap/match").asText());
+ assertEquals("type:regex", n.at("/query/wrap/type").asText());
+ assertEquals("orth", n.at("/query/wrap/layer").asText());
+ assertEquals("Kat.*", n.at("/query/wrap/key").asText());
+ }
+
+ @Test
+ public void testNestedGroupRegex () throws Exception {
+ JsonNode n =runQuery("(/(/Kat.*/)/)");
+ assertEquals("koral:token", n.at("/query/@type").asText());
+ assertEquals("koral:term", n.at("/query/wrap/@type").asText());
+ assertEquals("match:eq", n.at("/query/wrap/match").asText());
+ assertEquals("type:regex", n.at("/query/wrap/type").asText());
+ assertEquals("orth", n.at("/query/wrap/layer").asText());
+ assertEquals("(Kat.*)", n.at("/query/wrap/key").asText());
+
+ // EM: I think the nested slashes are not necessary here
+ // Please see fragment RE_group rule in AqlLexer.g4 #85
+ }
+}