re-added CosmasTree (temporarily deleted because maven exlude did not work)
diff --git a/pom.xml b/pom.xml
index a4a0098..f92fff3 100644
--- a/pom.xml
+++ b/pom.xml
@@ -61,7 +61,7 @@
<source>1.7</source>
<target>1.7</target>
<excludes>
- <!--exclude>**/CosmasTree.java</exclude-->
+ <exclude>**/CosmasTree.java</exclude>
</excludes>
</configuration>
</plugin>
diff --git a/src/main/java/de/ids_mannheim/korap/query/serialize/CosmasTree.java b/src/main/java/de/ids_mannheim/korap/query/serialize/CosmasTree.java
new file mode 100644
index 0000000..4ea14eb
--- /dev/null
+++ b/src/main/java/de/ids_mannheim/korap/query/serialize/CosmasTree.java
@@ -0,0 +1,347 @@
+package de.ids_mannheim.korap.query.serialize;
+
+import java.util.ArrayList;
+import java.util.LinkedHashMap;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.antlr.runtime.ANTLRStringStream;
+import org.antlr.runtime.RecognitionException;
+import org.antlr.runtime.tree.Tree;
+
+import de.ids_mannheim.korap.query.cosmas2.c2psLexer;
+import de.ids_mannheim.korap.query.cosmas2.c2psParser;
+
+/**
+ * Map representation of CosmasII syntax tree as returned by ANTLR
+ * @author joachim
+ *
+ */
+public class CosmasTree extends AbstractSyntaxTree {
+
+ private static c2psParser cosmasParser;
+ /*
+ * Following collections have the following functions:
+ * - the request is a map with two keys (meta/query): {meta=[], query=[]}
+ * - the query is a list of token group maps: {meta=[], query=[tg1=[], tg2=[]]}
+ * - each token group is a list of tokens: {meta=[], query=[tg1=[t1_1, t1_2], tg2=[t2_1, t2_2, t2_3]]}
+ * - each token corresponds to a single 'fields' linked list {meta=[], query=[tg1=[t1_1=[], t1_2=[]], ... ]}
+ * - each fields list contains a logical operator and 'field maps' defining attributes and values
+ * {meta=[], query=[tg1=[t1_1=[[disj, {base=foo}, {base=bar}]], t1_2=[]], ... ]}
+ */
+ String query;
+ LinkedHashMap<String,Object> requestMap = new LinkedHashMap<String,Object>();
+ LinkedHashMap<String,Object> queryMap = new LinkedHashMap<String,Object>();
+ LinkedHashMap<String,Object> tokenGroup = new LinkedHashMap<String,Object>();
+ ArrayList<Object> fieldGroup = new ArrayList<Object>();
+ LinkedHashMap<String,Object> fieldMap;
+ ArrayList<List<Object>> distantTokens;
+ /**
+ * Makes it possible to store several distantTokenGroups
+ */
+ LinkedList<ArrayList<List<Object>>> distantTokensStack = new LinkedList<ArrayList<List<Object>>>();
+ /**
+ * Field for repetition query (Kleene + or * operations, or min/max queries: {2,4}
+ */
+ String repetition = "";
+ int tokenCount=0;
+ int tokenGroupCount=0;
+ /**
+ * Keeps track of open node categories
+ */
+ LinkedList<String> openNodeCats = new LinkedList<String>();
+ /**
+ * Global control structure for fieldGroups, keeps track of open fieldGroups.
+ */
+ LinkedList<ArrayList<Object>> openFieldGroups = new LinkedList<ArrayList<Object>>();
+ /**
+ * Global control structure for tokenGroups, keeps track of open tokenGroups.
+ */
+ LinkedList<LinkedHashMap<String,Object>> tokenGroupsStack = new LinkedList<LinkedHashMap<String,Object>>();
+ /**
+ * Flag that indicates whether token fields or meta fields are currently being processed
+ */
+ boolean inMeta = false;
+ boolean negate = false;
+
+ Tree cosmasTree;
+
+ LinkedHashMap<String,Object> treeMap = new LinkedHashMap<String,Object>();
+ /**
+ * Keeps track of all visited nodes in a tree
+ */
+ List<Tree> visited = new ArrayList<Tree>();
+
+
+ /**
+ *
+ * @param tree The syntax tree as returned by ANTLR
+ * @param parser The ANTLR parser instance that generated the parse tree
+ */
+ public CosmasTree(String query) {
+ this.query = query;
+ process(query);
+ System.out.println(requestMap);
+ }
+
+ @Override
+ public Map<String, Object> getRequestMap() {
+ return this.requestMap;
+ }
+
+ @Override
+ public void process(String query) {
+ Tree tree = parseCosmasQuery(query);
+ System.out.println("Processing Cosmas");
+ processNode(tree);
+ }
+
+ private void processNode(Tree node) {
+
+ // Top-down processing
+ if (visited.contains(node)) return;
+ else visited.add(node);
+
+
+ String nodeCat = getNodeCat(node);
+ openNodeCats.push(nodeCat);
+
+
+
+ System.out.println(openNodeCats);
+ System.out.println(distantTokensStack);
+
+ /* ***************************************
+ * Processing individual node categories *
+ *****************************************/
+ // C2QP is tree root
+ if (nodeCat.equals("C2PQ")) {
+ queryMap = new LinkedHashMap<String,Object>();
+ requestMap.put("query", queryMap);
+ }
+
+ // Nodes introducing tokens. Process all in the same manner, except for the fieldMap entry
+ if (nodeCat.equals("OPWF") || nodeCat.equals("OPLEM") || nodeCat.equals("OPMORPH")) {
+
+ if (tokenGroupsStack.isEmpty()) {
+ tokenGroup = new LinkedHashMap<String, Object>();
+ tokenCount=0;
+ tokenGroupCount++;
+ queryMap.put("tokenGroup"+tokenGroupCount, tokenGroup);
+ tokenGroupsStack.push(tokenGroup);
+ } else {
+ tokenGroup = tokenGroupsStack.getFirst();
+ }
+
+ // check if this token comes after a distant operator (like "/+w3:4") and if yes,
+ // insert the empty tokenGroups before the current token
+ if (openNodeCats.get(1).equals("ARG2")) {
+ if (openNodeCats.get(2).equals("OPPROX") && !distantTokensStack.isEmpty()) {
+ for (List<Object> distantTokenGroup : distantTokensStack.pop()) {
+// if (tokenGroupsStack.isEmpty()) {
+// queryMap.put("token"+tokenGroupCount+"_1", distantTokenGroup);
+// } else {
+ tokenCount++;
+ tokenGroupsStack.getFirst().put("token"+tokenGroupCount+"_"+tokenCount, distantTokenGroup);
+// }
+// tokenGroupCount++;
+ }
+ }
+ // check negation of token by preceding OPNOT
+// else if (openNodeCats.get(2).equals("OPNOT")) {
+// negate = true;
+// }
+ }
+
+ fieldGroup = new ArrayList<Object>();
+ tokenCount++;
+ tokenGroup.put("token"+tokenGroupCount+"_"+tokenCount, fieldGroup);
+
+ fieldMap = new LinkedHashMap<String, Object>();
+ fieldGroup.add(fieldMap);
+
+ // make category-specific fieldMap entry
+ if (nodeCat.equals("OPWF")) {
+ fieldMap.put("form", node.getChild(0).toStringTree());
+ }
+ if (nodeCat.equals("OPLEM")) {
+ fieldMap.put("lemma", node.getChild(0).toStringTree());
+ }
+ if (nodeCat.equals("OPMORPH")) {
+ fieldMap.put("morph", node.toStringTree());
+ //TODO decompose morphology query
+ }
+ // negate field (see above)
+ if (negate) {
+ fieldMap.put("relation", "!=");
+ }
+
+// tokenGroupsStack.push(tokenGroup);
+ }
+
+ // negate every token that's under OPNOT > ARG2
+ if (nodeCat.equals("ARG2") && openNodeCats.get(1).equals("OPNOT")) {
+ negate = true;
+ }
+
+ if (nodeCat.equals("OPOR")) {
+ tokenGroup = new LinkedHashMap<String, Object>();
+ tokenCount=0;
+ tokenGroupCount++;
+ if (tokenGroupsStack.isEmpty()) {
+ queryMap.put("tokenGroup"+tokenGroupCount, tokenGroup);
+ } else {
+ tokenGroupsStack.getFirst().put("tokenGroup"+tokenGroupCount, tokenGroup);
+ }
+ tokenGroup.put("type", "disj");
+ tokenGroupsStack.push(tokenGroup);
+ }
+
+ if (nodeCat.equals("OPAND")) {
+ tokenGroup = new LinkedHashMap<String, Object>();
+ tokenCount=0;
+ tokenGroupCount++;
+ if (tokenGroupsStack.isEmpty()) {
+ queryMap.put("tokenGroup"+tokenGroupCount, tokenGroup);
+ } else {
+ tokenGroupsStack.getFirst().put("tokenGroup"+tokenGroupCount, tokenGroup);
+ }
+ tokenGroup.put("type", "conj");
+ tokenGroupsStack.push(tokenGroup);
+ }
+
+ if (nodeCat.equals("OPPROX")) {
+ distantTokens = new ArrayList<List<Object>>();
+ Tree prox_opts = node.getChild(0);
+ Tree typ = prox_opts.getChild(0);
+ System.err.println(typ.getChild(0).toStringTree());
+ Tree dist_list = prox_opts.getChild(1);
+ // get relevant information
+ String direction = dist_list.getChild(0).getChild(0).getChild(0).toStringTree();
+ String min = dist_list.getChild(0).getChild(1).getChild(0).toStringTree();
+ String max = dist_list.getChild(0).getChild(1).getChild(1).toStringTree();
+ if (min.equals("VAL0")) {
+ min=max;
+ }
+ // create empty tokens and put them on the stack to place them between arg1 and arg2
+ for (int i=0; i<Integer.parseInt(max)-1; i++) {
+ ArrayList<Object> emptyToken = new ArrayList<Object>();
+ LinkedHashMap<String,Object> emptyFieldMap = new LinkedHashMap<String,Object>();
+ emptyToken.add(emptyFieldMap);
+ tokenGroup.put("token"+tokenGroupCount+"_1", emptyToken);
+ // mark all tokens between min and max optional
+ if (i>=Integer.parseInt(min)) {
+ emptyFieldMap.put("optional", "true");
+ }
+ distantTokens.add(emptyToken);
+ }
+ distantTokensStack.push(distantTokens);
+ }
+
+
+// System.err.println(tokenGroupsStack.size()+" "+tokenGroupsStack);
+ // recursion until 'query' node (root of tree) is processed
+ for (int i=0; i<node.getChildCount(); i++) {
+ Tree child = node.getChild(i);
+ processNode(child);
+ }
+
+ if (nodeCat.equals("ARG2") && openNodeCats.get(1).equals("OPNOT")) {
+ negate = false;
+ }
+
+ if (nodeCat.equals("OPAND") || nodeCat.equals("OPOR")) {
+ tokenGroupsStack.pop();
+// tokenGroupCount--;
+// tokenCount=0;
+ }
+
+ openNodeCats.pop();
+
+ }
+
+ /**
+ * Returns the category (or 'label') of the root of a ParseTree.
+ * @param node
+ * @return
+ */
+ public String getNodeCat(Tree node) {
+ String nodeCat = node.toStringTree();
+ Pattern p = Pattern.compile("\\((.*?)\\s"); // from opening parenthesis to 1st whitespace
+ Matcher m = p.matcher(node.toStringTree());
+ if (m.find()) {
+ nodeCat = m.group(1);
+ }
+ return nodeCat;
+ }
+
+ private static Tree parseCosmasQuery(String p) {
+ Tree tree = null;
+ ANTLRStringStream
+ ss = new ANTLRStringStream(p);
+ c2psLexer
+ lex = new c2psLexer(ss);
+ org.antlr.runtime.CommonTokenStream tokens = //v3
+ new org.antlr.runtime.CommonTokenStream(lex);
+ cosmasParser = new c2psParser(tokens);
+ c2psParser.c2ps_query_return
+ c2Return = null;
+ try
+ {
+ c2Return = cosmasParser.c2ps_query(); // statt t().
+ }
+ catch (RecognitionException e)
+ {
+ e.printStackTrace();
+ }
+ // AST Tree anzeigen:
+ tree = (Tree)c2Return.getTree();
+ return tree;
+ }
+
+ /**
+ * @param args
+ */
+ public static void main(String[] args) {
+ /*
+ * For testing
+ */
+ String[] queries = new String[] {
+ /* COSMAS 2 */
+// "&Mond",
+// "Mond Sterne",
+// "Mond*",
+// "Mond oder Sterne",
+// "(des oder eines) /+w2 (Bauern oder Bauers oder Bauerns)",
+// "(Sonne /+w2 Mond) /+w2:3 Sterne",
+// "Mond oder Sonne /w2 Sterne",
+// "MORPH(V PCP)",
+// "MORPH(V PCP) Baum" ,
+// "Sonne %w2 Mond",
+// "Sonne /w2 Mond",
+// "Sonne nicht (Mond Stern)",
+// "Sonne nicht (Mond oder Stern)",
+// "Sonne /+w1:4 Mond",
+ "(sonne und mond) oder sterne",
+ "(stern oder (sonne und mond)) und MORPH(V PCP)",
+ "(sonne und (stern oder mond)) /+w2 luna???",
+ "(Tag /+w2 $offenen) /+w1 Tür",
+ "heißt /+w2 \"und\" ,"
+ };
+ for (String q : queries) {
+ try {
+ System.out.println(q);
+ System.out.println(parseCosmasQuery(q).toStringTree());
+ CosmasTree act = new CosmasTree(q);
+ System.out.println();
+
+ } catch (NullPointerException npe) {
+ npe.printStackTrace();
+ System.out.println("null\n");
+ }
+ }
+ }
+}
\ No newline at end of file