collection queries: regex, groups
diff --git a/src/main/antlr/CollectionQuery.g4 b/src/main/antlr/CollectionQuery.g4
index 0bd7fe5..7aad659 100644
--- a/src/main/antlr/CollectionQuery.g4
+++ b/src/main/antlr/CollectionQuery.g4
@@ -20,9 +20,9 @@
 NE					: '!=';
 AND					: '&' | 'AND' | 'and' | 'UND' | 'und' ;
 OR					: '|' | 'OR' | 'or' | 'ODER' | 'oder' ;
-
+QMARK				: '?';
+SLASH				: '/';
 WS 					: ( ' ' | '\t' | '\r' | '\n' )+ -> skip ;
-fragment FOCC       : '{' WS* ( [0-9]* WS* ',' WS* [0-9]+ | [0-9]+ WS* ','? ) WS* '}';
 fragment NO_RE      : ~[ \t\/];
 fragment ALPHABET   : ~('\t' | ' ' | '/' | '*' | '?' | '+' | '{' | '}' | '[' | ']'
                     | '(' | ')' | '|' | '"' | ',' | ':' | '\'' | '\\' | '!' | '=' | '~' | '&' | '^' | '<' | '>' );
@@ -34,19 +34,20 @@
 WORD                : ALPHABET+;
 
 /*
- * Regular expressions (delimited by slashes in Annis)
+ * Regular expressions
  */
+fragment FOCC	     : '{' WS* ( [0-9]* WS* ',' WS* [0-9]+ | [0-9]+ WS* ','? ) WS* '}';
 fragment RE_char     : ~('*' | '?' | '+' | '{' | '}' | '[' | ']' | '/'
          	            | '(' | ')' | '|' | '"' | ':' | '\'' | '\\');
 fragment RE_alter    : ((RE_char | ('(' REGEX ')') | RE_chgroup) '|' REGEX )+;
 fragment RE_chgroup  : '[' RE_char+ ']';
+fragment RE_quant	 : (RE_star | RE_plus | RE_occ) QMARK?;
 fragment RE_opt      : (RE_char | RE_chgroup | ( '(' REGEX ')')) '?';
 fragment RE_star     : (RE_char | RE_chgroup | ( '(' REGEX ')')) '*';
 fragment RE_plus     : (RE_char | RE_chgroup | ( '(' REGEX ')')) '+';
 fragment RE_occ      : (RE_char | RE_chgroup | ( '(' REGEX ')')) FOCC;
 fragment RE_group    : '(' REGEX ')';
-SLASH				 : '/';
-REGEX     		     : SLASH ('.' | RE_char | RE_alter | RE_chgroup | RE_opt | RE_star | RE_plus | RE_occ | RE_group)* SLASH;
+REGEX     		     : SLASH ('.' | RE_char | RE_alter | RE_chgroup | RE_opt | RE_quant | RE_group)* SLASH;
 
 /*
  * PARSER SECTION
@@ -67,12 +68,17 @@
 ;
 	
 field
-:	WORD;
+:	WORD
+;
 	
 value
-:	WORD | NUMBER | '"' (WORD ws*)+'"'
-| 	regex;
-	
+: WORD 
+| NUMBER 
+| '"' (WORD ws*)+'"'
+| regex
+;
+
+/*	
 andGroup
 : 	(((LRB exprGroup RRB)|expr) AND)+ ((LRB exprGroup RRB)|expr)
 ;
@@ -80,13 +86,18 @@
 orGroup
 : 	(((LRB exprGroup RRB)|expr) OR)+ ((LRB exprGroup RRB)|expr)
 ;
+*/
+
+relation
+:	(expr|exprGroup) conj (expr|exprGroup|relation)
+; 
 
 exprGroup
-:	andGroup
-|	orGroup
+:	LRB (expr | exprGroup | relation) RRB
 ;
 
 start
-:	expr 
-|	exprGroup EOF
+:	( expr 
+	| exprGroup EOF 
+	| relation EOF ) 
 ;
\ No newline at end of file
diff --git a/src/main/java/de/ids_mannheim/korap/query/serialize/CollectionQueryTree.java b/src/main/java/de/ids_mannheim/korap/query/serialize/CollectionQueryTree.java
index 588f1a6..e47c7e5 100644
--- a/src/main/java/de/ids_mannheim/korap/query/serialize/CollectionQueryTree.java
+++ b/src/main/java/de/ids_mannheim/korap/query/serialize/CollectionQueryTree.java
@@ -10,7 +10,7 @@
 import java.util.*;
 
 /**
- * @author hanl
+ * @author hanl, bingel
  * @date 06/12/2013
  */
 public class CollectionQueryTree extends Antlr4AbstractSyntaxTree {
@@ -44,7 +44,7 @@
         } else {
             throw new NullPointerException("Parser has not been instantiated!");
         }
-
+        requestMap.put("@type", "korap:filter");
         System.out.println("Processing collection query");
         if (verbose) System.out.println(tree.toStringTree(parser));
         processNode(tree);
@@ -70,19 +70,20 @@
 		 ****************************************************************
 		 */
 
-        if (nodeCat.equals("andGroup")) {
-            LinkedHashMap<String, Object> exprGroup = makeTermGroup("and");
-            objectStack.push(exprGroup);
+        if (nodeCat.equals("relation")) {
+        	String operator = node.getChild(1).getChild(0).toStringTree(parser).equals("&") ? "and" : "or"; 
+            LinkedHashMap<String, Object> relationGroup = makeDocGroup(operator);
+            putIntoSuperObject(relationGroup);
+            objectStack.push(relationGroup);
             stackedObjects++;
-            putIntoSuperObject(exprGroup, 1);
         }
 
-        if (nodeCat.equals("orGroup")) {
-            LinkedHashMap<String, Object> exprGroup = makeTermGroup("or");
-            objectStack.push(exprGroup);
-            stackedObjects++;
-            putIntoSuperObject(exprGroup, 1);
-        }
+//        if (nodeCat.equals("orGroup")) {
+//            LinkedHashMap<String, Object> exprGroup = makeDocGroup("or");
+//            putIntoSuperObject(exprGroup);
+//            objectStack.push(exprGroup);
+//            stackedObjects++;
+//        }
 
         if (nodeCat.equals("expr")) {
             ParseTree fieldNode = getFirstChildWithCat(node, "field");
@@ -91,26 +92,26 @@
             List<ParseTree> valueNodes = getChildrenWithCat(node, "value");
 
             if (valueNodes.size() == 1) {
-                LinkedHashMap<String, Object> term = makeTerm();
-                term.put("attribute", "korap:field#" + field);
-                term.put("key", valueNodes.get(0).getChild(0).toStringTree(parser));
+                LinkedHashMap<String, Object> term = makeDoc();
+                term.put("key", field);
+                term.putAll(parseValue(valueNodes.get(0)));
                 String match = operatorNodes.get(0).getChild(0).toStringTree(parser);
                 term.put("match", "match:" + interpretMatch(match));
                 putIntoSuperObject(term);
             } else { // (valueNodes.size()==2)
-                LinkedHashMap<String, Object> termGroup = makeTermGroup("and");
+                LinkedHashMap<String, Object> termGroup = makeDocGroup("and");
                 ArrayList<Object> termGroupOperands = (ArrayList<Object>) termGroup.get("operands");
 
-                LinkedHashMap<String, Object> term1 = makeTerm();
-                term1.put("attribute", "korap:field#" + field);
-                term1.put("key", valueNodes.get(0).getChild(0).toStringTree(parser));
+                LinkedHashMap<String, Object> term1 = makeDoc();
+                term1.put("key", field);
+                term1.putAll(parseValue(valueNodes.get(0)));
                 String match1 = operatorNodes.get(0).getChild(0).toStringTree(parser);
                 term1.put("match", "match:" + invertInequation(interpretMatch(match1)));
                 termGroupOperands.add(term1);
 
-                LinkedHashMap<String, Object> term2 = makeTerm();
-                term2.put("attribute", "korap:field#" + field);
-                term2.put("key", valueNodes.get(1).getChild(0).toStringTree(parser));
+                LinkedHashMap<String, Object> term2 = makeDoc();
+                term2.put("key", field);
+                term2.putAll(parseValue(valueNodes.get(1)));
                 String match2 = operatorNodes.get(1).getChild(0).toStringTree(parser);
                 term2.put("match", "match:" + interpretMatch(match2));
                 termGroupOperands.add(term2);
@@ -139,7 +140,8 @@
 		 **************************************************************
 		 */
         if (!objectsToPop.isEmpty()) {
-            for (int i = 0; i < objectsToPop.pop(); i++) {
+        	int toPop = objectsToPop.pop();
+            for (int i = 0; i < toPop; i++) {
                 objectStack.pop();
             }
         }
@@ -149,7 +151,19 @@
     }
 
 
-    private String interpretMatch(String match) {
+    private LinkedHashMap<String, Object> parseValue(ParseTree valueNode) {
+    	LinkedHashMap<String, Object> map = new LinkedHashMap<String, Object>();
+    	if (getNodeCat(valueNode.getChild(0)).equals("regex")) {
+    		String regex = valueNode.getChild(0).getChild(0).toStringTree(parser);
+    		map.put("value", regex.substring(1, regex.length()-1));
+    		map.put("type", "type:regex");
+    	} else {
+    		map.put("value", valueNode.getChild(0).toStringTree(parser));
+    	}
+		return map;
+	}
+
+	private String interpretMatch(String match) {
         String out = null;
         switch (match) {
             case "<":
@@ -201,10 +215,10 @@
     private void putIntoSuperObject(LinkedHashMap<String, Object> object, int objStackPosition) {
         if (objectStack.size() > objStackPosition) {
             ArrayList<Object> topObjectOperands = (ArrayList<Object>) objectStack.get(objStackPosition).get("operands");
-            topObjectOperands.add(0, object);
+            topObjectOperands.add(object);
         } else {
             // I want the raw object, not a wrapped
-            requestMap.put("query", object);
+            requestMap.put("filter", object);
         }
     }
 
@@ -226,6 +240,7 @@
             // Get starting rule from parser
             Method startRule = CollectionQueryParser.class.getMethod("start");
             tree = (ParserRuleContext) startRule.invoke(parser, (Object[]) null);
+            System.out.println(tree.toStringTree(parser));
 
         }
         // Some things went wrong ...
@@ -244,8 +259,10 @@
         query = "(1990<year<2010&genre=Sport)|textClass=politk";
         query = "(textClass=wissenschaft & textClass=politik) | textClass=ausland";
         query = "1990<year<2010 & genre=Sport";
+        query = "(textClass=Sport | textClass=ausland) & corpusID=WPD";
+        query = "textClass=Sport";
         CollectionQueryTree filter = new CollectionQueryTree();
-//    	filter.verbose = true;
+    	filter.verbose = true;
         try {
             filter.process(query);
         } catch (QueryException e) {
diff --git a/src/test/java/CollectionQueryTreeTest.java b/src/test/java/CollectionQueryTreeTest.java
index 324a0f8..a367c26 100644
--- a/src/test/java/CollectionQueryTreeTest.java
+++ b/src/test/java/CollectionQueryTreeTest.java
@@ -1,3 +1,4 @@
+import static org.junit.Assert.*;
 import de.ids_mannheim.korap.query.serialize.CollectionQueryBuilder;
 import de.ids_mannheim.korap.query.serialize.CollectionQueryBuilder2;
 import de.ids_mannheim.korap.query.serialize.CollectionQueryTree;
@@ -9,77 +10,225 @@
 
 public class CollectionQueryTreeTest {
 
-    CollectionQueryTree ef;
-    String map;
-    private String query;
+	CollectionQueryTree cqt;
+	String map;
+	private String query;
+	private String expected;
 
-    private boolean equalsQueryContent(String res, String query) throws QueryException {
-        res = res.replaceAll(" ", "");
-        ef = new CollectionQueryTree();
-        ef.process(query);
-        String queryMap = ef.getRequestMap().get("query").toString().replaceAll(" ", "");
-        return res.equals(queryMap);
-    }
+	private boolean equalsQueryContent(String res, String query) throws QueryException {
+		res = res.replaceAll(" ", "");
+		cqt = new CollectionQueryTree();
+		cqt.process(query);
+		String queryMap = cqt.getRequestMap().get("query").toString().replaceAll(" ", "");
+		return res.equals(queryMap);
+	}
 
-    @Test
-    public void testSimple() throws QueryException {
-        query = "textClass=Sport";
-        String regex1 = "{@type=korap:filter, filter={@type=korap:term, attribute=textClass, key=Sport, match=match:eq}}";
-        ef = new CollectionQueryTree();
-        ef.process(query);
-        map = JsonUtils.toJSON(ef.getRequestMap());
-//		assertEquals(regex1.replaceAll(" ", ""), map.replaceAll(" ", ""));
-        System.out.println("THE QUERY: " + map);
-    }
+	@Test
+	public void testSimple() throws QueryException {
+		query = "textClass=Sport";
+		//      String regex1 = "{@type=korap:filter, filter={@type=korap:doc, attribute=textClass, key=Sport, match=match:eq}}";
+		expected = "{@type=korap:filter, filter={@type=korap:doc, key=textClass, value=Sport, match=match:eq}}";
+		cqt = new CollectionQueryTree();
+		cqt.process(query);
+		map = cqt.getRequestMap().toString();
+		assertEquals(expected.replaceAll(" ", ""), map.replaceAll(" ", ""));
+
+		query = "textClass!=Sport";
+		//	      String regex1 = "{@type=korap:filter, filter={@type=korap:doc, attribute=textClass, key=Sport, match=match:eq}}";
+		expected = "{@type=korap:filter, filter={@type=korap:doc, key=textClass, value=Sport, match=match:ne}}";
+		cqt = new CollectionQueryTree();
+		cqt.process(query);
+		map = cqt.getRequestMap().toString();
+		assertEquals(expected.replaceAll(" ", ""), map.replaceAll(" ", ""));
+	}
+	
+	@Test
+	public void testTwoConjuncts() throws QueryException {
+		query = "textClass=Sport & year=2014";
+		expected = 
+			"{@type=korap:filter, filter=" +
+				"{@type=korap:docGroup, relation=relation:and, operands=[" +
+					"{@type=korap:doc, key=textClass, value=Sport, match=match:eq}," +
+					"{@type=korap:doc, key=year, value=2014, match=match:eq}" +
+				"]}" +
+			"}";
+		cqt = new CollectionQueryTree();
+		cqt.process(query);
+		map = cqt.getRequestMap().toString();
+		assertEquals(expected.replaceAll(" ", ""), map.replaceAll(" ", ""));
+	}
+	
+	@Test
+	public void testThreeConjuncts() throws QueryException {
+		query = "textClass=Sport & year=2014 & corpusID=WPD";
+		expected = 
+			"{@type=korap:filter, filter=" +
+				"{@type=korap:docGroup, relation=relation:and, operands=[" +
+					"{@type=korap:doc, key=textClass, value=Sport, match=match:eq}," +
+					"{@type=korap:docGroup, relation=relation:and, operands=[" +
+						"{@type=korap:doc, key=year, value=2014, match=match:eq}," +
+						"{@type=korap:doc, key=corpusID, value=WPD, match=match:eq}" +
+					"]}" +
+				"]}" +
+			"}";
+		cqt = new CollectionQueryTree();
+		cqt.process(query);
+		map = cqt.getRequestMap().toString();
+		assertEquals(expected.replaceAll(" ", ""), map.replaceAll(" ", ""));
+	}
+	
+
+	@Test
+	public void testTwoDisjuncts() throws QueryException {
+		query = "textClass=Sport | year=2014";
+		expected = 
+			"{@type=korap:filter, filter=" +
+				"{@type=korap:docGroup, relation=relation:or, operands=[" +
+					"{@type=korap:doc, key=textClass, value=Sport, match=match:eq}," +
+					"{@type=korap:doc, key=year, value=2014, match=match:eq}" +
+				"]}" +
+			"}";
+		cqt = new CollectionQueryTree();
+		cqt.process(query);
+		map = cqt.getRequestMap().toString();
+		assertEquals(expected.replaceAll(" ", ""), map.replaceAll(" ", ""));
+	}
+	
+	@Test
+	public void testThreeDisjuncts() throws QueryException {
+		query = "textClass=Sport | year=2014 | corpusID=WPD";
+		expected = 
+			"{@type=korap:filter, filter=" +
+				"{@type=korap:docGroup, relation=relation:or, operands=[" +
+					"{@type=korap:doc, key=textClass, value=Sport, match=match:eq}," +
+					"{@type=korap:docGroup, relation=relation:or, operands=[" +
+						"{@type=korap:doc, key=year, value=2014, match=match:eq}," +
+						"{@type=korap:doc, key=corpusID, value=WPD, match=match:eq}" +
+					"]}" +
+				"]}" +
+			"}";
+		cqt = new CollectionQueryTree();
+		cqt.process(query);
+		map = cqt.getRequestMap().toString();
+		assertEquals(expected.replaceAll(" ", ""), map.replaceAll(" ", ""));
+	}
 
 
-    @Test
-    public void testComplex() throws QueryException {
-        query = "(textClass=Sport | textClass=ausland) & corpusID=WPD";
-        String regex1 = "{@type=korap:filter, filter={@type=korap:term, attribute=textClass, key=Sport, match=match:eq}}";
-        ef = new CollectionQueryTree();
-        ef.process(query);
-        map = JsonUtils.toJSON(ef.getRequestMap());
-//		assertEquals(regex1.replaceAll(" ", ""), map.replaceAll(" ", ""));
-        System.out.println("THE QUERY 1: " + map);
-    }
+	@Test
+	public void testMixed() throws QueryException {
+		query = "(textClass=Sport | textClass=ausland) & corpusID=WPD";
+		expected = 
+			"{@type=korap:filter, filter=" +
+				"{@type=korap:docGroup, relation=relation:and, operands=[" +
+					"{@type=korap:docGroup, relation=relation:or, operands=[" +
+						"{@type=korap:doc, key=textClass, value=Sport, match=match:eq}," +
+						"{@type=korap:doc, key=textClass, value=ausland, match=match:eq}" +
+					"]}," +
+					"{@type=korap:doc, key=corpusID, value=WPD, match=match:eq}" +
+				"]}" +
+			"}";
+		cqt = new CollectionQueryTree();
+		cqt.process(query);
+		map = cqt.getRequestMap().toString();
+		assertEquals(expected.replaceAll(" ", ""), map.replaceAll(" ", ""));
+		
+		query = "(textClass=Sport & textClass=ausland) & corpusID=WPD";
+		expected = 
+			"{@type=korap:filter, filter=" +
+				"{@type=korap:docGroup, relation=relation:and, operands=[" +
+					"{@type=korap:docGroup, relation=relation:and, operands=[" +
+						"{@type=korap:doc, key=textClass, value=Sport, match=match:eq}," +
+						"{@type=korap:doc, key=textClass, value=ausland, match=match:eq}" +
+					"]}," +
+					"{@type=korap:doc, key=corpusID, value=WPD, match=match:eq}" +
+				"]}" +
+			"}";
+		cqt = new CollectionQueryTree();
+		cqt.process(query);
+		map = cqt.getRequestMap().toString();
+		assertEquals(expected.replaceAll(" ", ""), map.replaceAll(" ", ""));
+		
+		query = "(textClass=Sport & textClass=ausland) | (corpusID=WPD & author=White)";
+		expected = 
+			"{@type=korap:filter, filter=" +
+				"{@type=korap:docGroup, relation=relation:or, operands=[" +
+					"{@type=korap:docGroup, relation=relation:and, operands=[" +
+						"{@type=korap:doc, key=textClass, value=Sport, match=match:eq}," +
+						"{@type=korap:doc, key=textClass, value=ausland, match=match:eq}" +
+					"]}," +
+					"{@type=korap:docGroup, relation=relation:and, operands=[" +
+						"{@type=korap:doc, key=corpusID, value=WPD, match=match:eq}," +
+						"{@type=korap:doc, key=author, value=White, match=match:eq}" +
+					"]}" +
+				"]}" +
+			"}";
+		cqt = new CollectionQueryTree();
+		cqt.process(query);
+		map = cqt.getRequestMap().toString();
+		assertEquals(expected.replaceAll(" ", ""), map.replaceAll(" ", ""));
+		
+		query = "(textClass=Sport & textClass=ausland) | (corpusID=WPD & author=White & year=2010)";
+		expected = 
+			"{@type=korap:filter, filter=" +
+				"{@type=korap:docGroup, relation=relation:or, operands=[" +
+					"{@type=korap:docGroup, relation=relation:and, operands=[" +
+						"{@type=korap:doc, key=textClass, value=Sport, match=match:eq}," +
+						"{@type=korap:doc, key=textClass, value=ausland, match=match:eq}" +
+					"]}," +
+					"{@type=korap:docGroup, relation=relation:and, operands=[" +
+						"{@type=korap:doc, key=corpusID, value=WPD, match=match:eq}," +
+						"{@type=korap:docGroup, relation=relation:and, operands=[" +
+							"{@type=korap:doc, key=author, value=White, match=match:eq}," +
+							"{@type=korap:doc, key=year, value=2010, match=match:eq}" +
+						"]}" +
+					"]}" +
+				"]}" +
+			"}";
+		cqt = new CollectionQueryTree();
+		cqt.process(query);
+		map = cqt.getRequestMap().toString();
+		assertEquals(expected.replaceAll(" ", ""), map.replaceAll(" ", ""));
+	}
 
-    @Test
-    public void testBuilder() throws QueryException {
-        CollectionQueryBuilder2 builder = new CollectionQueryBuilder2();
-        builder.setQuery("(textClass=Sport | textClass=ausland) & corpusID=WPD");
-        System.out.println("BUILDER RESULT: " + builder.toJSON());
-    }
+	@Test
+	public void testDate() throws QueryException {
+		// search for pubDate between 1990 and 2010!
+		query = "1990<pubDate<2010";
+		expected = 
+			"{@type=korap:filter, filter=" +
+				"{@type=korap:docGroup, relation=relation:and, operands=[" +
+					"{@type=korap:doc, key=pubDate, value=1990, match=match:gt}," +
+					"{@type=korap:doc, key=pubDate, value=2010, match=match:lt}" +
+				"]}" +
+			"}";
+		cqt = new CollectionQueryTree();
+		cqt.process(query);
+		map = cqt.getRequestMap().toString();
+		assertEquals(expected.replaceAll(" ", ""), map.replaceAll(" ", ""));
+		
+		query = "pubDate>=1990";
+		expected = 
+			"{@type=korap:filter, filter=" +
+				"{@type=korap:doc, key=pubDate, value=1990, match=match:geq}" +
+			"}";
+		cqt = new CollectionQueryTree();
+		cqt.process(query);
+		map = cqt.getRequestMap().toString();
+		assertEquals(expected.replaceAll(" ", ""), map.replaceAll(" ", ""));
+	}
 
-//    @Test
-    public void testSimpleBuilder() {
-        CollectionQueryBuilder b = new CollectionQueryBuilder();
-        b.addMetaFilter("corpusID", "WPD");
-        b.addMetaFilter("textClass", "wissenschaft");
-        b.setFilterAttributeRelation(Relation.AND);
-        System.out.println("SIMPLE BUILDER RESULT: " + b.toCollections());
-    }
-
-    // old builder pubDate query
-//    @Test
-    public void testDateQuery() {
-        CollectionQueryBuilder b = new CollectionQueryBuilder();
-        String query = "pubDate=>" + TimeUtils.getNow().getMillis();
-        query = query + " AND pubDate=<" + TimeUtils.getNow().getMillis();
-        b.addMetaFilterQuery(query);
-        b.setFilterAttributeRelation(Relation.AND);
-        System.out.println("FINAL RESOURCE: " + b.toCollections());
-    }
-
-    @Test
-    public void testDateNewQuery() throws QueryException {
-        // search for pubDate between 1990 and 2010!
-        String query = "1990<pubDate<2010 & genre=Sport";
-        CollectionQueryBuilder2 q = new CollectionQueryBuilder2();
-        q.setQuery(query);
-        System.out.println("DATE QUERY RESULT: " + q.toJSON());
-    }
-
+	@Test
+	public void testRegex() throws QueryException {
+		query = "author=/Go.*he/";
+		expected = 
+			"{@type=korap:filter, filter=" +
+				"{@type=korap:doc, key=author, value=Go.*he, type=type:regex, match=match:eq}" +
+			"}";
+		cqt = new CollectionQueryTree();
+		cqt.process(query);
+		map = cqt.getRequestMap().toString();
+		assertEquals(expected.replaceAll(" ", ""), map.replaceAll(" ", ""));
+	}
 
 }