moved KorAP-querySerialization
diff --git a/.classpath b/.classpath
new file mode 100644
index 0000000..5051509
--- /dev/null
+++ b/.classpath
@@ -0,0 +1,30 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<classpath>
+ <classpathentry kind="src" output="target/classes" path="src/main/java">
+ <attributes>
+ <attribute name="optional" value="true"/>
+ <attribute name="maven.pomderived" value="true"/>
+ </attributes>
+ </classpathentry>
+ <classpathentry kind="src" output="target/test-classes" path="src/test/java">
+ <attributes>
+ <attribute name="optional" value="true"/>
+ <attribute name="maven.pomderived" value="true"/>
+ </attributes>
+ </classpathentry>
+ <classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER">
+ <attributes>
+ <attribute name="maven.pomderived" value="true"/>
+ </attributes>
+ </classpathentry>
+ <classpathentry kind="con" path="org.eclipse.m2e.MAVEN2_CLASSPATH_CONTAINER">
+ <attributes>
+ <attribute name="maven.pomderived" value="true"/>
+ </attributes>
+ </classpathentry>
+ <classpathentry kind="lib" path="/home/joachim/korap/query/antlr-3.1.jar"/>
+ <classpathentry combineaccessrules="false" kind="src" path="/poliqarp"/>
+ <classpathentry kind="var" path="M2_REPO/org/codehaus/jackson/jackson-mapper-asl/1.9.2/jackson-mapper-asl-1.9.2.jar"/>
+ <classpathentry kind="var" path="M2_REPO/org/codehaus/jackson/jackson-core-asl/1.9.2/jackson-core-asl-1.9.2.jar"/>
+ <classpathentry kind="output" path="target/classes"/>
+</classpath>
diff --git a/.project b/.project
new file mode 100644
index 0000000..25aeeec
--- /dev/null
+++ b/.project
@@ -0,0 +1,23 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<projectDescription>
+ <name>QuerySerialization</name>
+ <comment></comment>
+ <projects>
+ </projects>
+ <buildSpec>
+ <buildCommand>
+ <name>org.eclipse.jdt.core.javabuilder</name>
+ <arguments>
+ </arguments>
+ </buildCommand>
+ <buildCommand>
+ <name>org.eclipse.m2e.core.maven2Builder</name>
+ <arguments>
+ </arguments>
+ </buildCommand>
+ </buildSpec>
+ <natures>
+ <nature>org.eclipse.jdt.core.javanature</nature>
+ <nature>org.eclipse.m2e.core.maven2Nature</nature>
+ </natures>
+</projectDescription>
diff --git a/notes.txt b/notes.txt
new file mode 100644
index 0000000..58bbc96
--- /dev/null
+++ b/notes.txt
@@ -0,0 +1,6 @@
+COSMAS:
+ - disjunctions with more than 2 arguments are parsed hiearchically by ANTLR grammar:
+ "A oder B oder C" becomes (simplified) "A|(B|C)"
+ - distance operators are a little hard to process, using a stack of "distantTokenGroups" containing empty tokens to keep track of
+ the operators, insert the tokenGroups on top of this stack before second argument
+ - using stacks proves to be a good choice for tree processing
\ No newline at end of file
diff --git a/pom.xml b/pom.xml
new file mode 100644
index 0000000..ae27c21
--- /dev/null
+++ b/pom.xml
@@ -0,0 +1,46 @@
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+
+ <groupId>de.ids_mannheim.korap.query</groupId>
+ <artifactId>xerialize</artifactId>
+ <version>0.0.1</version>
+ <packaging>jar</packaging>
+
+ <name>xerialize</name>
+ <url>http://maven.apache.org</url>
+
+ <properties>
+ <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
+ </properties>
+
+ <dependencies>
+ <dependency>
+ <groupId>junit</groupId>
+ <artifactId>junit</artifactId>
+ <version>3.8.1</version>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.antlr</groupId>
+ <artifactId>antlr4</artifactId>
+ <version>4.1</version>
+ <classifier>complete</classifier>
+ </dependency>
+ </dependencies>
+
+ <build>
+ <sourceDirectory>${basedir}/src/main/java</sourceDirectory>
+ <outputDirectory>${basedir}/bin</outputDirectory>
+ <plugins>
+ <plugin>
+ <artifactId>maven-compiler-plugin</artifactId>
+ <version>2.3.2</version>
+ <configuration>
+ <source>1.7</source>
+ <target>1.7</target>
+ </configuration>
+ </plugin>
+ </plugins>
+ </build>
+</project>
diff --git a/src/main/java/de/ids_mannheim/korap/query/serialize/AbstractSyntaxTree.java b/src/main/java/de/ids_mannheim/korap/query/serialize/AbstractSyntaxTree.java
new file mode 100644
index 0000000..5876d41
--- /dev/null
+++ b/src/main/java/de/ids_mannheim/korap/query/serialize/AbstractSyntaxTree.java
@@ -0,0 +1,13 @@
+package de.ids_mannheim.korap.query.serialize;
+
+import java.util.Map;
+
+public abstract class AbstractSyntaxTree {
+
+ String query;
+
+ public abstract Map<String, Object> getRequestMap();
+
+ public abstract void process(String query);
+
+}
diff --git a/src/main/java/de/ids_mannheim/korap/query/serialize/CosmasTree.java b/src/main/java/de/ids_mannheim/korap/query/serialize/CosmasTree.java
new file mode 100644
index 0000000..ee8a5f2
--- /dev/null
+++ b/src/main/java/de/ids_mannheim/korap/query/serialize/CosmasTree.java
@@ -0,0 +1,350 @@
+package de.ids_mannheim.korap.query.serialize;
+
+import java.util.ArrayList;
+import java.util.LinkedHashMap;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.antlr.runtime.ANTLRStringStream;
+import org.antlr.runtime.RecognitionException;
+import org.antlr.runtime.tree.Tree;
+
+import de.ids_mannheim.korap.query.cosmas2.c2psLexer;
+import de.ids_mannheim.korap.query.cosmas2.c2psParser;
+
+/**
+ * Map representation of CosmasII syntax tree as returned by ANTLR
+ * @author joachim
+ *
+ */
+public class CosmasTree extends AbstractSyntaxTree {
+
+ private static c2psParser cosmasParser;
+ /*
+ * Following collections have the following functions:
+ * - the request is a map with two keys (meta/query): {meta=[], query=[]}
+ * - the query is a list of token group maps: {meta=[], query=[tg1=[], tg2=[]]}
+ * - each token group is a list of tokens: {meta=[], query=[tg1=[t1_1, t1_2], tg2=[t2_1, t2_2, t2_3]]}
+ * - each token corresponds to a single 'fields' linked list {meta=[], query=[tg1=[t1_1=[], t1_2=[]], ... ]}
+ * - each fields list contains a logical operator and 'field maps' defining attributes and values
+ * {meta=[], query=[tg1=[t1_1=[[disj, {base=foo}, {base=bar}]], t1_2=[]], ... ]}
+ */
+ String query;
+ LinkedHashMap<String,Object> requestMap = new LinkedHashMap<String,Object>();
+ LinkedHashMap<String,Object> queryMap = new LinkedHashMap<String,Object>();
+ LinkedHashMap<String,Object> tokenGroup = new LinkedHashMap<String,Object>();
+ ArrayList<Object> fieldGroup = new ArrayList<Object>();
+ LinkedHashMap<String,Object> fieldMap;
+ ArrayList<List<Object>> distantTokens;
+ /**
+ * Makes it possible to store several distantTokenGroups
+ */
+ LinkedList<ArrayList<List<Object>>> distantTokensStack = new LinkedList<ArrayList<List<Object>>>();
+ /**
+ * Field for repetition query (Kleene + or * operations, or min/max queries: {2,4}
+ */
+ String repetition = "";
+ int tokenCount=0;
+ int tokenGroupCount=0;
+ /**
+ * Keeps track of open node categories
+ */
+ LinkedList<String> openNodeCats = new LinkedList<String>();
+ /**
+ * Global control structure for fieldGroups, keeps track of open fieldGroups.
+ */
+ LinkedList<ArrayList<Object>> openFieldGroups = new LinkedList<ArrayList<Object>>();
+ /**
+ * Global control structure for tokenGroups, keeps track of open tokenGroups.
+ */
+ LinkedList<LinkedHashMap<String,Object>> tokenGroupsStack = new LinkedList<LinkedHashMap<String,Object>>();
+ /**
+ * Flag that indicates whether token fields or meta fields are currently being processed
+ */
+ boolean inMeta = false;
+ boolean negate = false;
+
+ Tree cosmasTree;
+
+ LinkedHashMap<String,Object> treeMap = new LinkedHashMap<String,Object>();
+ /**
+ * Keeps track of all visited nodes in a tree
+ */
+ List<Tree> visited = new ArrayList<Tree>();
+
+
+ /**
+ *
+ * @param tree The syntax tree as returned by ANTLR
+ * @param parser The ANTLR parser instance that generated the parse tree
+ */
+ public CosmasTree(String query) {
+ this.query = query;
+ process(query);
+ System.out.println(requestMap);
+ }
+
+ @Override
+ public Map<String, Object> getRequestMap() {
+ return this.requestMap;
+ }
+
+ @Override
+ public void process(String query) {
+ Tree tree = parseCosmasQuery(query);
+ System.out.println("Processing Cosmas");
+ processNode(tree);
+ }
+
+ private void processNode(Tree node) {
+
+ // Top-down processing
+ if (visited.contains(node)) return;
+ else visited.add(node);
+
+
+ String nodeCat = getNodeCat(node);
+ openNodeCats.push(nodeCat);
+
+
+
+ System.out.println(openNodeCats);
+ System.out.println(distantTokensStack);
+
+ /* ***************************************
+ * Processing individual node categories *
+ *****************************************/
+ // C2QP is tree root
+ if (nodeCat.equals("C2PQ")) {
+ queryMap = new LinkedHashMap<String,Object>();
+ requestMap.put("query", queryMap);
+ }
+
+ // Nodes introducing tokens. Process all in the same manner, except for the fieldMap entry
+ if (nodeCat.equals("OPWF") || nodeCat.equals("OPLEM") || nodeCat.equals("OPMORPH")) {
+
+ if (tokenGroupsStack.isEmpty()) {
+ tokenGroup = new LinkedHashMap<String, Object>();
+ tokenCount=0;
+ tokenGroupCount++;
+ queryMap.put("tokenGroup"+tokenGroupCount, tokenGroup);
+ tokenGroupsStack.push(tokenGroup);
+ } else {
+ tokenGroup = tokenGroupsStack.getFirst();
+ }
+
+ // check if this token comes after a distant operator (like "/+w3:4") and if yes,
+ // insert the empty tokenGroups before the current token
+ if (openNodeCats.get(1).equals("ARG2")) {
+ if (openNodeCats.get(2).equals("OPPROX") && !distantTokensStack.isEmpty()) {
+ for (List<Object> distantTokenGroup : distantTokensStack.pop()) {
+// if (tokenGroupsStack.isEmpty()) {
+// queryMap.put("token"+tokenGroupCount+"_1", distantTokenGroup);
+// } else {
+ tokenCount++;
+ tokenGroupsStack.getFirst().put("token"+tokenGroupCount+"_"+tokenCount, distantTokenGroup);
+// }
+// tokenGroupCount++;
+ }
+ }
+ // check negation of token by preceding OPNOT
+// else if (openNodeCats.get(2).equals("OPNOT")) {
+// negate = true;
+// }
+ }
+
+ fieldGroup = new ArrayList<Object>();
+ tokenCount++;
+ tokenGroup.put("token"+tokenGroupCount+"_"+tokenCount, fieldGroup);
+
+ fieldMap = new LinkedHashMap<String, Object>();
+ fieldGroup.add(fieldMap);
+
+ // make category-specific fieldMap entry
+ if (nodeCat.equals("OPWF")) {
+ fieldMap.put("form", node.getChild(0).toStringTree());
+ }
+ if (nodeCat.equals("OPLEM")) {
+ fieldMap.put("lemma", node.getChild(0).toStringTree());
+ }
+ if (nodeCat.equals("OPMORPH")) {
+ fieldMap.put("morph", node.toStringTree());
+ //TODO decompose morphology query
+ }
+ // negate field (see above)
+ if (negate) {
+ fieldMap.put("relation", "!=");
+ }
+
+// tokenGroupsStack.push(tokenGroup);
+ }
+
+ // negate every token that's under OPNOT > ARG2
+ if (nodeCat.equals("ARG2") && openNodeCats.get(1).equals("OPNOT")) {
+ negate = true;
+ }
+
+ if (nodeCat.equals("OPOR")) {
+ tokenGroup = new LinkedHashMap<String, Object>();
+ tokenCount=0;
+ tokenGroupCount++;
+ if (tokenGroupsStack.isEmpty()) {
+ queryMap.put("tokenGroup"+tokenGroupCount, tokenGroup);
+ } else {
+ tokenGroupsStack.getFirst().put("tokenGroup"+tokenGroupCount, tokenGroup);
+ }
+ tokenGroup.put("type", "disj");
+ tokenGroupsStack.push(tokenGroup);
+ }
+
+ if (nodeCat.equals("OPAND")) {
+ tokenGroup = new LinkedHashMap<String, Object>();
+ tokenCount=0;
+ tokenGroupCount++;
+ if (tokenGroupsStack.isEmpty()) {
+ queryMap.put("tokenGroup"+tokenGroupCount, tokenGroup);
+ } else {
+ tokenGroupsStack.getFirst().put("tokenGroup"+tokenGroupCount, tokenGroup);
+ }
+ tokenGroup.put("type", "conj");
+ tokenGroupsStack.push(tokenGroup);
+ }
+
+ if (nodeCat.equals("OPPROX")) {
+ distantTokens = new ArrayList<List<Object>>();
+ Tree prox_opts = node.getChild(0);
+ Tree typ = prox_opts.getChild(0);
+ System.err.println(typ.getChild(0).toStringTree());
+ Tree dist_list = prox_opts.getChild(1);
+ // get relevant information
+ String direction = dist_list.getChild(0).getChild(0).getChild(0).toStringTree();
+ String min = dist_list.getChild(0).getChild(1).getChild(0).toStringTree();
+ String max = dist_list.getChild(0).getChild(1).getChild(1).toStringTree();
+ if (min.equals("VAL0")) {
+ min=max;
+ }
+ // create empty tokens and put them on the stack to place them between arg1 and arg2
+ for (int i=0; i<Integer.parseInt(max)-1; i++) {
+ ArrayList<Object> emptyToken = new ArrayList<Object>();
+ LinkedHashMap<String,Object> emptyFieldMap = new LinkedHashMap<String,Object>();
+ emptyToken.add(emptyFieldMap);
+ tokenGroup.put("token"+tokenGroupCount+"_1", emptyToken);
+ // mark all tokens between min and max optional
+ if (i>=Integer.parseInt(min)) {
+ emptyFieldMap.put("optional", "true");
+ }
+ distantTokens.add(emptyToken);
+ }
+ distantTokensStack.push(distantTokens);
+ }
+
+
+// System.err.println(tokenGroupsStack.size()+" "+tokenGroupsStack);
+ // recursion until 'query' node (root of tree) is processed
+ for (int i=0; i<node.getChildCount(); i++) {
+ Tree child = node.getChild(i);
+ processNode(child);
+ }
+
+ if (nodeCat.equals("ARG2") && openNodeCats.get(1).equals("OPNOT")) {
+ negate = false;
+ }
+
+ if (nodeCat.equals("OPAND") || nodeCat.equals("OPOR")) {
+ tokenGroupsStack.pop();
+// tokenGroupCount--;
+// tokenCount=0;
+ }
+
+ openNodeCats.pop();
+
+ }
+
+ /**
+ * Returns the category (or 'label') of the root of a ParseTree.
+ * @param node
+ * @return
+ */
+ public String getNodeCat(Tree node) {
+ String nodeCat = node.toStringTree();
+ Pattern p = Pattern.compile("\\((.*?)\\s"); // from opening parenthesis to 1st whitespace
+ Matcher m = p.matcher(node.toStringTree());
+ if (m.find()) {
+ nodeCat = m.group(1);
+ }
+ return nodeCat;
+ }
+
+ private static Tree parseCosmasQuery(String p) {
+ Tree tree = null;
+ ANTLRStringStream
+ ss = new ANTLRStringStream(p);
+ c2psLexer
+ lex = new c2psLexer(ss);
+ org.antlr.runtime.CommonTokenStream tokens = //v3
+ new org.antlr.runtime.CommonTokenStream(lex);
+ cosmasParser = new c2psParser(tokens);
+ c2psParser.c2ps_query_return
+ c2Return = null;
+ try
+ {
+ c2Return = cosmasParser.c2ps_query(); // statt t().
+ }
+ catch (RecognitionException e)
+ {
+ e.printStackTrace();
+ }
+ // AST Tree anzeigen:
+ tree = (Tree)c2Return.getTree();
+ return tree;
+ }
+
+ /**
+ * @param args
+ */
+ public static void main(String[] args) {
+ /*
+ * For testing
+ */
+ String[] queries = new String[] {
+ /* COSMAS 2 */
+// "&Mond",
+// "Mond Sterne",
+// "Mond*",
+// "Mond oder Sterne",
+// "(des oder eines) /+w2 (Bauern oder Bauers oder Bauerns)",
+// "(Sonne /+w2 Mond) /+w2:3 Sterne",
+// "Mond oder Sonne /w2 Sterne",
+// "MORPH(V PCP)",
+// "MORPH(V PCP) Baum" ,
+// "Sonne %w2 Mond",
+// "Sonne /w2 Mond",
+// "Sonne nicht (Mond Stern)",
+// "Sonne nicht (Mond oder Stern)",
+// "Sonne /+w1:4 Mond",
+ "(sonne und mond) oder sterne",
+ "(stern oder (sonne und mond)) und MORPH(V PCP)",
+ "(sonne und (stern oder mond)) /+w2 luna???",
+ "(Tag /+w2 $offenen) /+w1 Tür",
+ "heißt /+w2 \"und\" ,"
+ };
+ for (String q : queries) {
+ try {
+ System.out.println(q);
+ System.out.println(parseCosmasQuery(q).toStringTree());
+ CosmasTree act = new CosmasTree(q);
+ System.out.println();
+
+ } catch (NullPointerException npe) {
+ npe.printStackTrace();
+ System.out.println("null\n");
+ }
+ }
+ }
+
+
+}
+
diff --git a/src/main/java/de/ids_mannheim/korap/query/serialize/JsonGenerator.java b/src/main/java/de/ids_mannheim/korap/query/serialize/JsonGenerator.java
new file mode 100644
index 0000000..9661e5e
--- /dev/null
+++ b/src/main/java/de/ids_mannheim/korap/query/serialize/JsonGenerator.java
@@ -0,0 +1,62 @@
+package de.ids_mannheim.korap.query.serialize;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.Map;
+
+import org.codehaus.jackson.JsonGenerationException;
+import org.codehaus.jackson.map.JsonMappingException;
+import org.codehaus.jackson.map.ObjectMapper;
+
+public class JsonGenerator {
+
+ ObjectMapper mapper;
+ AbstractSyntaxTree ast;
+
+ public JsonGenerator() {
+ mapper = new ObjectMapper();
+ }
+
+ public void run(String outFile, String query, String queryLanguage) throws JsonGenerationException, JsonMappingException, IOException {
+ if (queryLanguage.equals("poliqarp")) {
+ ast = new PoliqarpTree(query);
+ } else if (queryLanguage.equals("cosmas")) {
+ ast = new CosmasTree(query);
+ }
+ Map<String, Object> requestMap = ast.getRequestMap();
+ mapper.writeValue(new File(outFile), requestMap);
+ }
+
+ /**
+ * @param args
+ */
+ public static void main(String[] args) {
+ /*
+ * just for testing...
+ */
+ JsonGenerator jg = new JsonGenerator();
+ int i=0;
+ String[] queries = new String[] {
+
+ "[base=foo]|([base=foo][base=bar])* meta author=Goethe&year=1815",
+ "([base=foo]|[base=foo])[base=foobar]",
+ "[base=foo]([base=foo]|[base=foobar])",
+ };
+ for (String q : queries) {
+ i++;
+ try {
+ System.out.println(q);
+ jg.run(System.getProperty("user.home")+"/test"+i+".json", q, "poliqarp");
+ } catch (NullPointerException npe) {
+ npe.printStackTrace();
+ System.out.println("null\n");
+ } catch (JsonGenerationException e) {
+ e.printStackTrace();
+ } catch (JsonMappingException e) {
+ e.printStackTrace();
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+ }
+ }
+}
diff --git a/src/main/java/de/ids_mannheim/korap/query/serialize/PoliqarpPlusTree.java b/src/main/java/de/ids_mannheim/korap/query/serialize/PoliqarpPlusTree.java
new file mode 100644
index 0000000..2d05d06
--- /dev/null
+++ b/src/main/java/de/ids_mannheim/korap/query/serialize/PoliqarpPlusTree.java
@@ -0,0 +1,537 @@
+package de.ids_mannheim.korap.query.serialize;
+
+import java.lang.reflect.Method;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.LinkedHashMap;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.antlr.v4.runtime.ANTLRInputStream;
+import org.antlr.v4.runtime.BailErrorStrategy;
+import org.antlr.v4.runtime.CharStream;
+import org.antlr.v4.runtime.CommonTokenStream;
+import org.antlr.v4.runtime.Lexer;
+import org.antlr.v4.runtime.Parser;
+import org.antlr.v4.runtime.ParserRuleContext;
+import org.antlr.v4.runtime.tree.ParseTree;
+
+import de.ids_mannheim.korap.query.poliqarpplus.PoliqarpPlusLexer;
+import de.ids_mannheim.korap.query.poliqarpplus.PoliqarpPlusParser;
+
+/**
+ * Map representation of Poliqarp syntax tree as returned by ANTLR
+ * @author joachim
+ *
+ */
+public class PoliqarpPlusTree extends AbstractSyntaxTree {
+
+ /**
+ * Top-level map representing the whole request.
+ */
+ LinkedHashMap<String,Object> requestMap = new LinkedHashMap<String,Object>();
+ /**
+ * Keeps track of open node categories
+ */
+ LinkedList<String> openNodeCats = new LinkedList<String>();
+ /**
+ * Flag that indicates whether token fields or meta fields are currently being processed
+ */
+ boolean inMeta = false;
+ /**
+ * Flag that indicates whether a cq_segment is to be ignored (e.g. when it is empty, is followed directly by only a spanclass and has no other children etc...).
+ */
+ boolean ignoreCq_segment = false;
+ /**
+ * Parser object deriving the ANTLR parse tree.
+ */
+ static Parser poliqarpParser;
+ /**
+ * Keeps track of all visited nodes in a tree
+ */
+ List<ParseTree> visited = new ArrayList<ParseTree>();
+
+ /**
+ * Keeps track of active fields (like 'base=foo').
+ */
+ LinkedList<ArrayList<Object>> fieldStack = new LinkedList<ArrayList<Object>>();
+ /**
+ * Keeps track of active sequences.
+ */
+ LinkedList<LinkedHashMap<String,Object>> sequenceStack = new LinkedList<LinkedHashMap<String,Object>>();
+ /**
+ * Keeps track of active tokens.
+ */
+ LinkedList<LinkedHashMap<String,Object>> tokenStack = new LinkedList<LinkedHashMap<String,Object>>();
+ /**
+ * Keeps track of sequence/token/field groups.
+ */
+ LinkedList<ArrayList<Object>> groupStack = new LinkedList<ArrayList<Object>>();
+ /**
+ * Marks the currently active object (sequence/token/group...) in order to know where to add stuff like occurrence info etc.
+ */
+ LinkedHashMap<String,Object> curObject = new LinkedHashMap<String,Object>();
+ /**
+ * Marks the currently active token in order to know where to add flags (might already have been taken away from token stack).
+ */
+ LinkedHashMap<String,Object> curToken = new LinkedHashMap<String,Object>();
+
+ /**
+ *
+ * @param tree The syntax tree as returned by ANTLR
+ * @param parser The ANTLR parser instance that generated the parse tree
+ */
+ public PoliqarpPlusTree(String query) {
+ this.query = query;
+ prepareContext();
+ process(query);
+ System.out.println(">>> "+requestMap.get("query")+" <<<");
+ }
+
+ private void prepareContext() {
+ LinkedHashMap<String,Object> context = new LinkedHashMap<String,Object>();
+ LinkedHashMap<String,Object> operands = new LinkedHashMap<String,Object>();
+ LinkedHashMap<String,Object> relation = new LinkedHashMap<String,Object>();
+ LinkedHashMap<String,Object> classMap = new LinkedHashMap<String,Object>();
+
+ operands.put("@id", "korap:operands");
+ operands.put("@container", "@list");
+
+ relation.put("@id", "korap:relation");
+ relation.put("@type", "korap:relation#types");
+
+ classMap.put("@id", "korap:class");
+ classMap.put("@type", "xsd:integer");
+
+ context.put("korap", "http://korap.ids-mannheim.de/ns/query");
+ context.put("@language", "de");
+ context.put("operands", operands);
+ context.put("relation", relation);
+ context.put("class", classMap);
+ context.put("query", "korap:query");
+ context.put("filter", "korap:filter");
+ context.put("meta", "korap:meta");
+
+ requestMap.put("@context", context);
+ }
+
+ @Override
+ public Map<String, Object> getRequestMap() {
+ return this.requestMap;
+ }
+
+ @Override
+ public void process(String query) {
+ ParseTree tree = parsePoliqarpQuery(query);
+ System.out.println("Processing Poliqarp");
+ processNode(tree);
+ }
+
+ @SuppressWarnings("unchecked")
+ private void processNode(ParseTree node) {
+ // Top-down processing
+ if (visited.contains(node)) return;
+ else visited.add(node);
+
+ String nodeCat = getNodeCat(node);
+ openNodeCats.push(nodeCat);
+
+// System.out.println(openNodeCats);
+
+ /*
+ ****************************************************************
+ ****************************************************************
+ * Processing individual node categories *
+ ****************************************************************
+ ****************************************************************
+ */
+ if (nodeCat.equals("query")) {
+ }
+
+ // cq_segments/sq_segments: token group
+ if (nodeCat.equals("cq_segments") || nodeCat.equals("sq_segments")) {
+ // disregard empty segments in simple queries (parsed by ANTLR as empty cq_segments)
+ ignoreCq_segment = (node.getChildCount() == 1 && node.getChild(0).toStringTree(poliqarpParser).equals(" ") && getNodeCat(node.getChild(0)).equals("spanclass"));
+// ignoreCq_segment = (node.getChildCount() == 1 && node.getChild(0).toStringTree(poliqarpParser).equals(" ") );
+ if (!ignoreCq_segment) {
+ LinkedHashMap<String,Object> sequence = new LinkedHashMap<String,Object>();
+ curObject = sequence;
+ // Step I: decide type of element (one or more elements? -> token or sequence)
+ if (node.getChildCount()>1) {
+ sequence.put("@type", "korap:sequence");
+ ArrayList<Object> sequenceOperands = new ArrayList<Object>();
+ sequence.put("operands", sequenceOperands);
+ } else {
+ // if only child, make the sequence a mere korap:token
+ sequence.put("@type", "korap:token");
+ tokenStack.push(sequence);
+ }
+ // Step II: decide where to put this element (top query node or embedded in super sequence?)
+ if (openNodeCats.get(1).equals("query")) {
+ requestMap.put("query", sequence);
+ } else if (!groupStack.isEmpty()) {
+ groupStack.getFirst().add(sequence);
+ } else if (openNodeCats.get(1).equals("spanclass")) {
+ System.out.println("TODO!");
+ } else if (!sequenceStack.isEmpty()){
+ ArrayList<Object> topSequenceOperands = (ArrayList<Object>) sequenceStack.getFirst().get("operands");
+ topSequenceOperands.add(sequence);
+ }
+ sequenceStack.push(sequence);
+ }
+ }
+
+ // cq_segment
+ if (nodeCat.equals("cq_segment")) {
+ // Step I: determine whether to create new token or get token from the stack (if added by cq_segments)
+ LinkedHashMap<String, Object> token;
+ if (tokenStack.isEmpty()) {
+ token = new LinkedHashMap<String, Object>();
+ tokenStack.push(token);
+ } else {
+ // in case cq_segments has already added the token
+ token = tokenStack.getFirst();
+ }
+ curObject = token;
+ curToken = token;
+
+ // Step II: start filling object and add to containing sequence
+ token.put("@type", "korap:token");
+ // add token to sequence only if it is not an only child (in that case, cq_segments has already added the info and is just waiting for the values from "field")
+ if (node.getParent().getChildCount()>1) {
+ ArrayList<Object> topSequenceOperands = (ArrayList<Object>) sequenceStack.getFirst().get("operands");
+ topSequenceOperands.add(token);
+ }
+ }
+
+ // disjoint cq_segments, like ([base=foo][base=bar])|[base=foobar]
+ if (nodeCat.equals("cq_disj_segments")) {
+ LinkedHashMap<String,Object> disjunction = new LinkedHashMap<String,Object>();
+ curObject = disjunction;
+ ArrayList<Object> disjOperands = new ArrayList<Object>();
+ disjunction.put("@type", "korap:group");
+ disjunction.put("relation", "or");
+ disjunction.put("operands", disjOperands);
+ groupStack.push(disjOperands);
+
+ // decide where to put the disjunction
+ if (openNodeCats.get(1).equals("query")) {
+ requestMap.put("query", disjunction);
+ } else if (openNodeCats.get(1).equals("cq_segments")) {
+ ArrayList<Object> topSequenceOperands = (ArrayList<Object>) sequenceStack.getFirst().get("operands");
+ topSequenceOperands.add(disjunction);
+ }
+ }
+
+ // field element (outside meta)
+ if (nodeCat.equals("field")) {
+ LinkedHashMap<String,Object> fieldMap = new LinkedHashMap<String,Object>();
+
+ // Step I: extract info
+ String featureName = node.getChild(0).getChild(0).toStringTree(poliqarpParser); //e.g. (field_name base) (field_op !=) (re_query "bar*")
+ String relation = node.getChild(1).getChild(0).toStringTree(poliqarpParser);
+ String value = "";
+ ParseTree valNode = node.getChild(2);
+ String valType = getNodeCat(valNode);
+ fieldMap.put("@type", "korap:term");
+ if (valType.equals("simple_query")) {
+ value = valNode.getChild(0).getChild(0).toStringTree(poliqarpParser); //e.g. (simple_query (sq_segment foo))
+ } else if (valType.equals("re_query")) {
+ value = valNode.getChild(0).toStringTree(poliqarpParser); //e.g. (re_query "bar*")
+ fieldMap.put("@subtype", "korap:value#regex");
+ }
+ fieldMap.put("@value", featureName+":"+value);
+ fieldMap.put("relation", relation);
+
+ // Step II: decide where to put the field map (as the only value of a token or the meta filter or as a part of a group in case of coordinated fields)
+ if (fieldStack.isEmpty()) {
+ if (!inMeta) {
+ tokenStack.getFirst().put("@value", fieldMap);
+ } else {
+ ((HashMap<String, Object>) requestMap.get("meta")).put("@value", fieldMap);
+ }
+ } else {
+ fieldStack.getFirst().add(fieldMap);
+ }
+ visited.add(node.getChild(0));
+ visited.add(node.getChild(1));
+ visited.add(node.getChild(2));
+ }
+
+ // conj_field serves for both conjunctions and disjunctions
+ if (nodeCat.equals("conj_field")) {
+ LinkedHashMap<String,Object> group = new LinkedHashMap<String,Object>();
+ ArrayList<Object> groupOperands = new ArrayList<Object>();
+
+ group.put("@type", "korap:group");
+ group.put("operands", groupOperands);
+ fieldStack.push(groupOperands);
+
+ // Step I: get operator (& or |)
+ ParseTree operatorNode = node.getChild(1).getChild(0);
+ String operator = getNodeCat(operatorNode);
+ if (operator.equals("|")) {
+ group.put("relation", "or");
+ } else if (operator.equals("&")) {
+ group.put("relation", "and");
+ }
+
+ // Step II: decide where to put the group (directly under token or in top meta filter section or embed in super group)
+ if (openNodeCats.get(1).equals("cq_segment")) {
+ tokenStack.getFirst().put("@value", group);
+ } else if (openNodeCats.get(1).equals("meta_field_group")) {
+ ((HashMap<String, Object>) requestMap.get("meta")).put("@value", group);
+ } else {
+ fieldStack.get(1).add(group);
+ }
+ // skip the operator
+ visited.add(node.getChild(1));
+ }
+
+
+ if (nodeCat.equals("sq_segment")) {
+ // Step I: determine whether to create new token or get token from the stack (if added by cq_segments)
+ LinkedHashMap<String, Object> token;
+ if (tokenStack.isEmpty()) {
+ token = new LinkedHashMap<String, Object>();
+ tokenStack.push(token);
+ } else {
+ // in case sq_segments has already added the token
+ token = tokenStack.getFirst();
+ }
+ curObject = token;
+ curToken = token;
+ // Step II: fill object (token values) and put into containing sequence
+ token.put("@type", "korap:token");
+ String word = node.getChild(0).toStringTree(poliqarpParser);
+ LinkedHashMap<String,Object> tokenValues = new LinkedHashMap<String,Object>();
+ token.put("@value", tokenValues);
+ tokenValues.put("orth", word);
+ tokenValues.put("relation", "=");
+
+ // add token to sequence only if it is not an only child (in that case, sq_segments has already added the info and is just waiting for the values from "field")
+ if (node.getParent().getChildCount()>1) {
+ ArrayList<Object> topSequenceOperands = (ArrayList<Object>) sequenceStack.getFirst().get("operands");
+ topSequenceOperands.add(token);
+ }
+ }
+
+ if (nodeCat.equals("element")) {
+ // Step I: determine whether to create new token or get token from the stack (if added by cq_segments)
+ LinkedHashMap<String, Object> elem;
+ if (tokenStack.isEmpty()) {
+ elem = new LinkedHashMap<String, Object>();
+ } else {
+ // in case sq_segments has already added the token
+ elem = tokenStack.getFirst();
+ }
+ curObject = elem;
+ curToken = elem;
+ // Step II: fill object (token values) and put into containing sequence
+ elem.put("@type", "korap:element");
+ String value = node.getChild(1).toStringTree(poliqarpParser);
+ elem.put("@value", value);
+
+ // add token to sequence only if it is not an only child (in that case, cq_segments has already added the info and is just waiting for the values from "field")
+// if (node.getParent().getChildCount()>1) {
+ ArrayList<Object> topSequenceOperands = (ArrayList<Object>) sequenceStack.getFirst().get("operands");
+ topSequenceOperands.add(elem);
+// }
+ }
+
+ if (nodeCat.equals("spanclass")) {
+ LinkedHashMap<String,Object> span = new LinkedHashMap<String,Object>();
+ curObject = span;
+ ArrayList<Object> spanOperands = new ArrayList<Object>();
+ String id = "0";
+ // Step I: get info
+ if (getNodeCat(node.getChild(1)).equals("spanclass_id")) {
+ id = node.getChild(1).getChild(0).toStringTree(poliqarpParser);
+ id = id.substring(0, id.length()-1); // remove trailing colon ':'
+ // only allow class ids up to 255
+ if (Integer.parseInt(id)>255) {
+ id = "0";
+ }
+ }
+
+ span.put("@type", "korap:group");
+ span.put("class", id);
+ span.put("operands", spanOperands);
+ groupStack.push(spanOperands);
+
+ // decide where to put the span
+ // add token to sequence only if it is not an only child (in that case, sq_segments has already added the info and is just waiting for the values from "field")
+ if (node.getParent().getChildCount()>1) {
+ ArrayList<Object> topSequenceOperands = (ArrayList<Object>) sequenceStack.getFirst().get("operands");
+ topSequenceOperands.add(span);
+ } else if (openNodeCats.get(2).equals("query")) {
+ requestMap.put("query", span);
+ } else if (!groupStack.isEmpty()) {
+ groupStack.getFirst().add(span);
+ }
+
+ }
+
+ if (nodeCat.equals("position")) {
+ //TODO
+ }
+
+ if (nodeCat.equals("shrink") || nodeCat.equals("split")) {
+ //TODO
+ }
+
+ // repetition of token group
+ if (nodeCat.equals("occ")) {
+ ParseTree occChild = node.getChild(0);
+ String repetition = occChild.toStringTree(poliqarpParser);
+ curObject.put("repetition", repetition);
+ visited.add(occChild);
+ }
+
+ // flags for case sensitivity and whole-word-matching
+ if (nodeCat.equals("flag")) {
+ String flag = getNodeCat(node.getChild(0)).substring(1); //substring removes leading slash '/'
+ // add to current token's value
+ ((HashMap<String, Object>) curToken.get("@value")).put("flag", flag);
+ }
+
+ if (nodeCat.equals("meta")) {
+ inMeta=true;
+ LinkedHashMap<String,Object> metaFilter = new LinkedHashMap<String,Object>();
+ requestMap.put("meta", metaFilter);
+ metaFilter.put("@type", "korap:meta");
+ }
+
+
+
+ if (nodeCat.equals("within")) {
+ ParseTree domainNode = node.getChild(2);
+ String domain = getNodeCat(domainNode);
+// queryOperands.add("within:"+domain);
+ curObject.put("within", domain);
+ visited.add(node.getChild(0));
+ visited.add(node.getChild(1));
+ visited.add(domainNode);
+ }
+
+ /*
+ ****************************************************************
+ ****************************************************************
+ * recursion until 'request' node (root of tree) is processed *
+ * **************************************************************
+ ****************************************************************
+ */
+ for (int i=0; i<node.getChildCount(); i++) {
+ ParseTree child = node.getChild(i);
+ processNode(child);
+ }
+
+ // Stuff that happens when leaving a node (taking it off the stack)
+ if (nodeCat.equals("cq_segments") || nodeCat.equals("sq_segments")) {
+ // exclude whitespaces analysed as empty cq_segments
+ if (!ignoreCq_segment) {
+ sequenceStack.pop();
+ }
+ }
+
+ if (nodeCat.equals("cq_disj_segments")) {
+ groupStack.pop();
+ }
+
+ if (nodeCat.equals("cq_segment") || nodeCat.equals("sq_segment")){
+ tokenStack.pop();
+ }
+
+ if (nodeCat.equals("conj_field")) {
+ fieldStack.pop();
+ }
+
+ openNodeCats.pop();
+
+ }
+
+ /**
+ * Returns the category (or 'label') of the root of a ParseTree.
+ * @param node
+ * @return
+ */
+ public String getNodeCat(ParseTree node) {
+ String nodeCat = node.toStringTree(poliqarpParser);
+ Pattern p = Pattern.compile("\\((.*?)\\s"); // from opening parenthesis to 1st whitespace
+ Matcher m = p.matcher(node.toStringTree(poliqarpParser));
+ if (m.find()) {
+ nodeCat = m.group(1);
+ }
+ return nodeCat;
+ }
+
+ private static ParserRuleContext parsePoliqarpQuery (String p) {
+ Lexer poliqarpLexer = new PoliqarpPlusLexer((CharStream)null);
+ ParserRuleContext tree = null;
+ // Like p. 111
+ try {
+
+ // Tokenize input data
+ ANTLRInputStream input = new ANTLRInputStream(p);
+ poliqarpLexer.setInputStream(input);
+ CommonTokenStream tokens = new CommonTokenStream(poliqarpLexer);
+ poliqarpParser = new PoliqarpPlusParser(tokens);
+
+ // Don't throw out erroneous stuff
+ poliqarpParser.setErrorHandler(new BailErrorStrategy());
+ poliqarpParser.removeErrorListeners();
+
+ // Get starting rule from parser
+ Method startRule = PoliqarpPlusParser.class.getMethod("request");
+ tree = (ParserRuleContext) startRule.invoke(poliqarpParser, (Object[])null);
+ }
+
+ // Some things went wrong ...
+ catch (Exception e) {
+ System.err.println( e.getMessage() );
+ }
+
+ // Return the generated tree
+ return tree;
+ }
+
+ public static void main(String[] args) {
+ /*
+ * For testing
+ */
+ String[] queries = new String[] {
+// "[base=foo]|([base=foo][base=bar])*",
+// "([base=foo]|[base=bar])[base=foobar]",
+// "[base=foo]([base=bar]|[base=foobar/i])",
+// "[base=bar|base=foo]",
+// "[base=bar]",
+// "[base=foo][base=bar]",
+// "[(base=bar|base=foo)&orth=wee]",
+// "[base=foo/i][base=bar]{2,4}",
+// "foo bar/i"
+ "{[base=foo]}[orth=bar]",
+ "{[base=foo]}{[orth=bar]}",
+ "{1:[base=foo]<np>}",
+ "shrink({[base=foo]}[orth=bar])",
+// "[base=foo]|([base=foo][base=bar])* meta author=Goethe&year=1815"
+ };
+ for (String q : queries) {
+ try {
+ System.out.println(q);
+ System.out.println(PoliqarpPlusTree.parsePoliqarpQuery(q).toStringTree(PoliqarpPlusTree.poliqarpParser));
+ @SuppressWarnings("unused")
+ PoliqarpPlusTree pt = new PoliqarpPlusTree(q);
+// System.out.println(PoliqarpPlusTree.parsePoliqarpQuery(q).toStringTree(PoliqarpPlusTree.poliqarpParser));
+ System.out.println();
+
+ } catch (NullPointerException npe) {
+ npe.printStackTrace();
+ System.out.println("null\n");
+ }
+ }
+ }
+
+}
\ No newline at end of file
diff --git a/src/main/java/de/ids_mannheim/korap/query/serialize/PoliqarpTree.java b/src/main/java/de/ids_mannheim/korap/query/serialize/PoliqarpTree.java
new file mode 100644
index 0000000..2ab76da
--- /dev/null
+++ b/src/main/java/de/ids_mannheim/korap/query/serialize/PoliqarpTree.java
@@ -0,0 +1,462 @@
+package de.ids_mannheim.korap.query.serialize;
+
+import java.lang.reflect.Method;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.LinkedHashMap;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.antlr.v4.runtime.ANTLRInputStream;
+import org.antlr.v4.runtime.BailErrorStrategy;
+import org.antlr.v4.runtime.CharStream;
+import org.antlr.v4.runtime.CommonTokenStream;
+import org.antlr.v4.runtime.Lexer;
+import org.antlr.v4.runtime.Parser;
+import org.antlr.v4.runtime.ParserRuleContext;
+import org.antlr.v4.runtime.tree.ParseTree;
+
+import de.ids_mannheim.korap.query.poliqarp.PoliqarpLexer;
+import de.ids_mannheim.korap.query.poliqarp.PoliqarpParser;
+
+/**
+ * Map representation of Poliqarp syntax tree as returned by ANTLR
+ * @author joachim
+ *
+ */
+public class PoliqarpTree extends AbstractSyntaxTree {
+
+ /**
+ * Top-level map representing the whole request.
+ */
+ LinkedHashMap<String,Object> requestMap = new LinkedHashMap<String,Object>();
+ /**
+ * Keeps track of open node categories
+ */
+ LinkedList<String> openNodeCats = new LinkedList<String>();
+ /**
+ * Flag that indicates whether token fields or meta fields are currently being processed
+ */
+ boolean inMeta = false;
+ /**
+ * Parser object deriving the ANTLR parse tree.
+ */
+ static Parser poliqarpParser;
+ /**
+ * Keeps track of all visited nodes in a tree
+ */
+ List<ParseTree> visited = new ArrayList<ParseTree>();
+
+ /**
+ * Keeps track of active fields (like 'base=foo').
+ */
+ LinkedList<ArrayList<Object>> fieldStack = new LinkedList<ArrayList<Object>>();
+ /**
+ * Keeps track of active sequences.
+ */
+ LinkedList<LinkedHashMap<String,Object>> sequenceStack = new LinkedList<LinkedHashMap<String,Object>>();
+ /**
+ * Keeps track of active tokens.
+ */
+ LinkedList<LinkedHashMap<String,Object>> tokenStack = new LinkedList<LinkedHashMap<String,Object>>();
+ /**
+ * Keeps track of sequence/token/field groups.
+ */
+ LinkedList<ArrayList<Object>> groupStack = new LinkedList<ArrayList<Object>>();
+ /**
+ * Marks the currently active object (sequence/token/group...) in order to know where to add stuff like occurrence info etc.
+ */
+ LinkedHashMap<String,Object> curObject = new LinkedHashMap<String,Object>();
+ /**
+ * Marks the currently active token in order to know where to add flags (might already have been taken away from token stack).
+ */
+ LinkedHashMap<String,Object> curToken = new LinkedHashMap<String,Object>();
+
+ /**
+ *
+ * @param tree The syntax tree as returned by ANTLR
+ * @param parser The ANTLR parser instance that generated the parse tree
+ */
+ public PoliqarpTree(String query) {
+ this.query = query;
+ prepareContext();
+ process(query);
+ System.out.println(">>> "+requestMap+" <<<");
+ }
+
+ private void prepareContext() {
+ LinkedHashMap<String,Object> context = new LinkedHashMap<String,Object>();
+ LinkedHashMap<String,Object> operands = new LinkedHashMap<String,Object>();
+ LinkedHashMap<String,Object> relation = new LinkedHashMap<String,Object>();
+ LinkedHashMap<String,Object> classMap = new LinkedHashMap<String,Object>();
+
+ operands.put("@id", "korap:operands");
+ operands.put("@container", "@list");
+
+ relation.put("@id", "korap:relation");
+ relation.put("@type", "korap:relation#types");
+
+ classMap.put("@id", "korap:class");
+ classMap.put("@type", "xsd:integer");
+
+ context.put("korap", "http://korap.ids-mannheim.de/ns/query");
+ context.put("@language", "de");
+ context.put("operands", operands);
+ context.put("relation", relation);
+ context.put("class", classMap);
+ context.put("query", "korap:query");
+ context.put("filter", "korap:filter");
+ context.put("meta", "korap:meta");
+
+ requestMap.put("@context", context);
+ }
+
+ @Override
+ public Map<String, Object> getRequestMap() {
+ return this.requestMap;
+ }
+
+ @Override
+ public void process(String query) {
+ ParseTree tree = parsePoliqarpQuery(query);
+ System.out.println("Processing Poliqarp");
+ processNode(tree);
+ }
+
+ @SuppressWarnings("unchecked")
+ private void processNode(ParseTree node) {
+ // Top-down processing
+ if (visited.contains(node)) return;
+ else visited.add(node);
+
+ String nodeCat = getNodeCat(node);
+ openNodeCats.push(nodeCat);
+
+// System.out.println(openNodeCats);
+
+ /*
+ ****************************************************************
+ ****************************************************************
+ * Processing individual node categories *
+ ****************************************************************
+ ****************************************************************
+ */
+ if (nodeCat.equals("query")) {
+ }
+
+ // cq_segments/sq_segments: token group
+ if (nodeCat.equals("cq_segments") || nodeCat.equals("sq_segments")) {
+ // disregard empty segments in simple queries (parsed by ANTLR as empty cq_segments)
+ if (node.getChildCount() > 0 && !node.getChild(0).toStringTree(poliqarpParser).equals(" ")) {
+ LinkedHashMap<String,Object> sequence = new LinkedHashMap<String,Object>();
+ curObject = sequence;
+ // Step I: decide type of element (one or more elements? -> token or sequence)
+ if (node.getChildCount()>1) {
+ sequence.put("@type", "korap:sequence");
+ ArrayList<Object> sequenceOperands = new ArrayList<Object>();
+ sequence.put("operands", sequenceOperands);
+ } else {
+ // if only child, make the sequence a mere korap:token
+ sequence.put("@type", "korap:token");
+ tokenStack.push(sequence);
+ }
+ // Step II: decide where to put this element (top query node or embedded in super sequence?)
+ if (openNodeCats.get(1).equals("query")) {
+ requestMap.put("query", sequence);
+ } else if (!groupStack.isEmpty()) {
+ groupStack.getFirst().add(sequence);
+ } else {
+ ArrayList<Object> topSequenceOperands = (ArrayList<Object>) sequenceStack.getFirst().get("operands");
+ topSequenceOperands.add(sequence);
+ }
+ sequenceStack.push(sequence);
+ }
+ }
+
+ // cq_segment
+ if (nodeCat.equals("cq_segment")) {
+ // Step I: determine whether to create new token or get token from the stack (if added by cq_segments)
+ LinkedHashMap<String, Object> token;
+ if (tokenStack.isEmpty()) {
+ token = new LinkedHashMap<String, Object>();
+ tokenStack.push(token);
+ } else {
+ // in case cq_segments has already added the token
+ token = tokenStack.getFirst();
+ }
+ curObject = token;
+ curToken = token;
+
+ // Step II: start filling object and add to containing sequence
+ token.put("@type", "korap:token");
+ // add token to sequence only if it is not an only child (in that case, cq_segments has already added the info and is just waiting for the values from "field")
+ if (node.getParent().getChildCount()>1) {
+ ArrayList<Object> topSequenceOperands = (ArrayList<Object>) sequenceStack.getFirst().get("operands");
+ topSequenceOperands.add(token);
+ }
+ }
+
+ // disjoint cq_segments, like ([base=foo][base=bar])|[base=foobar]
+ if (nodeCat.equals("cq_disj_segments")) {
+ LinkedHashMap<String,Object> disjunction = new LinkedHashMap<String,Object>();
+ curObject = disjunction;
+ ArrayList<Object> disjOperands = new ArrayList<Object>();
+ disjunction.put("@type", "korap:group");
+ disjunction.put("relation", "or");
+ disjunction.put("operands", disjOperands);
+ groupStack.push(disjOperands);
+
+ // decide where to put the disjunction
+ if (openNodeCats.get(1).equals("query")) {
+ requestMap.put("query", disjunction);
+ } else if (openNodeCats.get(1).equals("cq_segments")) {
+ ArrayList<Object> topSequenceOperands = (ArrayList<Object>) sequenceStack.getFirst().get("operands");
+ topSequenceOperands.add(disjunction);
+ }
+ }
+
+ // field element (outside meta)
+ if (nodeCat.equals("field")) {
+ LinkedHashMap<String,Object> fieldMap = new LinkedHashMap<String,Object>();
+
+ // Step I: extract info
+ String featureName = node.getChild(0).getChild(0).toStringTree(poliqarpParser); //e.g. (field_name base) (field_op !=) (re_query "bar*")
+ String relation = node.getChild(1).getChild(0).toStringTree(poliqarpParser);
+ String value = "";
+ ParseTree valNode = node.getChild(2);
+ String valType = getNodeCat(valNode);
+ fieldMap.put("@type", "korap:term");
+ if (valType.equals("simple_query")) {
+ value = valNode.getChild(0).getChild(0).toStringTree(poliqarpParser); //e.g. (simple_query (sq_segment foo))
+ } else if (valType.equals("re_query")) {
+ value = valNode.getChild(0).toStringTree(poliqarpParser); //e.g. (re_query "bar*")
+ fieldMap.put("@subtype", "korap:value#regex");
+ }
+ fieldMap.put("@value", featureName+":"+value);
+ fieldMap.put("relation", relation);
+
+ // Step II: decide where to put the field map (as the only value of a token or the meta filter or as a part of a group in case of coordinated fields)
+ if (fieldStack.isEmpty()) {
+ if (!inMeta) {
+ tokenStack.getFirst().put("@value", fieldMap);
+ } else {
+ ((HashMap<String, Object>) requestMap.get("meta")).put("@value", fieldMap);
+ }
+ } else {
+ fieldStack.getFirst().add(fieldMap);
+ }
+ visited.add(node.getChild(0));
+ visited.add(node.getChild(1));
+ visited.add(node.getChild(2));
+ }
+
+ // conj_field serves for both conjunctions and disjunctions
+ if (nodeCat.equals("conj_field")) {
+ LinkedHashMap<String,Object> group = new LinkedHashMap<String,Object>();
+ ArrayList<Object> groupOperands = new ArrayList<Object>();
+
+ group.put("@type", "korap:group");
+ group.put("operands", groupOperands);
+ fieldStack.push(groupOperands);
+
+ // Step I: get operator (& or |)
+ ParseTree operatorNode = node.getChild(1).getChild(0);
+ String operator = getNodeCat(operatorNode);
+ if (operator.equals("|")) {
+ group.put("relation", "or");
+ } else if (operator.equals("&")) {
+ group.put("relation", "and");
+ }
+
+ // Step II: decide where to put the group (directly under token or in top meta filter section or embed in super group)
+ if (openNodeCats.get(1).equals("cq_segment")) {
+ tokenStack.getFirst().put("@value", group);
+ } else if (openNodeCats.get(1).equals("meta_field_group")) {
+ ((HashMap<String, Object>) requestMap.get("meta")).put("@value", group);
+ } else {
+ fieldStack.get(1).add(group);
+ }
+ // skip the operator
+ visited.add(node.getChild(1));
+ }
+
+
+ if (nodeCat.equals("sq_segment")) {
+ // Step I: determine whether to create new token or get token from the stack (if added by cq_segments)
+ LinkedHashMap<String, Object> token;
+ if (tokenStack.isEmpty()) {
+ token = new LinkedHashMap<String, Object>();
+ tokenStack.push(token);
+ } else {
+ // in case sq_segments has already added the token
+ token = tokenStack.getFirst();
+ }
+ curObject = token;
+ curToken = token;
+ // Step II: fill object (token values) and put into containing sequence
+ token.put("@type", "korap:token");
+ String word = node.getChild(0).toStringTree(poliqarpParser);
+ LinkedHashMap<String,Object> tokenValues = new LinkedHashMap<String,Object>();
+ token.put("@value", tokenValues);
+ tokenValues.put("orth", word);
+ tokenValues.put("relation", "=");
+
+ // add token to sequence only if it is not an only child (in that case, sq_segments has already added the info and is just waiting for the values from "field")
+ if (node.getParent().getChildCount()>1) {
+ ArrayList<Object> topSequenceOperands = (ArrayList<Object>) sequenceStack.getFirst().get("operands");
+ topSequenceOperands.add(token);
+ }
+ }
+
+ // repetition of token group
+ if (nodeCat.equals("occ")) {
+ ParseTree occChild = node.getChild(0);
+ String repetition = occChild.toStringTree(poliqarpParser);
+ curObject.put("repetition", repetition);
+ visited.add(occChild);
+ }
+
+ // flags for case sensitivity and whole-word-matching
+ if (nodeCat.equals("flag")) {
+ String flag = getNodeCat(node.getChild(0)).substring(1); //substring removes leading slash '/'
+ // add to current token's value
+ ((HashMap<String, Object>) curToken.get("@value")).put("flag", flag);
+ }
+
+ if (nodeCat.equals("meta")) {
+ inMeta=true;
+ LinkedHashMap<String,Object> metaFilter = new LinkedHashMap<String,Object>();
+ requestMap.put("meta", metaFilter);
+ metaFilter.put("@type", "korap:meta");
+ }
+
+
+
+ if (nodeCat.equals("within")) {
+ ParseTree domainNode = node.getChild(2);
+ String domain = getNodeCat(domainNode);
+// queryOperands.add("within:"+domain);
+ curObject.put("within", domain);
+ visited.add(node.getChild(0));
+ visited.add(node.getChild(1));
+ visited.add(domainNode);
+ }
+
+ /*
+ ****************************************************************
+ ****************************************************************
+ * recursion until 'request' node (root of tree) is processed *
+ * **************************************************************
+ ****************************************************************
+ */
+ for (int i=0; i<node.getChildCount(); i++) {
+ ParseTree child = node.getChild(i);
+ processNode(child);
+ }
+
+ // Stuff that happens when leaving a node (taking it off the stack)
+ if (nodeCat.equals("cq_segments") || nodeCat.equals("sq_segments")) {
+ // exclude whitespaces analysed as empty cq_segments
+ if (node.getChildCount() > 0 && !getNodeCat(node.getChild(0)).equals(" ")) {
+ sequenceStack.pop();
+ }
+ }
+
+ if (nodeCat.equals("cq_disj_segments")) {
+ groupStack.pop();
+ }
+
+ if (nodeCat.equals("cq_segment") || nodeCat.equals("sq_segment")){
+ tokenStack.pop();
+ }
+
+ if (nodeCat.equals("conj_field")) {
+ fieldStack.pop();
+ }
+
+ openNodeCats.pop();
+
+ }
+
+ /**
+ * Returns the category (or 'label') of the root of a ParseTree.
+ * @param node
+ * @return
+ */
+ public String getNodeCat(ParseTree node) {
+ String nodeCat = node.toStringTree(poliqarpParser);
+ Pattern p = Pattern.compile("\\((.*?)\\s"); // from opening parenthesis to 1st whitespace
+ Matcher m = p.matcher(node.toStringTree(poliqarpParser));
+ if (m.find()) {
+ nodeCat = m.group(1);
+ }
+ return nodeCat;
+ }
+
+ private static ParserRuleContext parsePoliqarpQuery (String p) {
+ Lexer poliqarpLexer = new PoliqarpLexer((CharStream)null);
+ ParserRuleContext tree = null;
+ // Like p. 111
+ try {
+
+ // Tokenize input data
+ ANTLRInputStream input = new ANTLRInputStream(p);
+ poliqarpLexer.setInputStream(input);
+ CommonTokenStream tokens = new CommonTokenStream(poliqarpLexer);
+ poliqarpParser = new PoliqarpParser(tokens);
+
+ // Don't throw out erroneous stuff
+ poliqarpParser.setErrorHandler(new BailErrorStrategy());
+ poliqarpParser.removeErrorListeners();
+
+ // Get starting rule from parser
+ Method startRule = PoliqarpParser.class.getMethod("request");
+ tree = (ParserRuleContext) startRule.invoke(poliqarpParser, (Object[])null);
+ }
+
+ // Some things went wrong ...
+ catch (Exception e) {
+ System.err.println( e.getMessage() );
+ }
+
+ // Return the generated tree
+ return tree;
+ }
+
+ public static void main(String[] args) {
+ /*
+ * For testing
+ */
+ String[] queries = new String[] {
+// "[base=foo]|([base=foo][base=bar])*",
+// "([base=foo]|[base=bar])[base=foobar]",
+// "[base=foo]([base=bar]|[base=foobar/i])",
+// "[base=bar|base=foo]",
+// "[base=bar]",
+// "[base=foo][base=bar]",
+// "[(base=bar|base=foo)&orth=wee]",
+// "[base=foo/i][base=bar]{2,4}",
+// "foo bar/i"
+ "[base=foo] meta author=Goethe&year=1885",
+ "[base=foo]|([base=foo][base=bar])* meta author=Goethe&year=1815"
+ };
+ for (String q : queries) {
+ try {
+ System.out.println(q);
+ System.out.println(PoliqarpTree.parsePoliqarpQuery(q).toStringTree(PoliqarpTree.poliqarpParser));
+ @SuppressWarnings("unused")
+ PoliqarpTree pt = new PoliqarpTree(q);
+ System.out.println(PoliqarpTree.parsePoliqarpQuery(q).toStringTree(PoliqarpTree.poliqarpParser));
+ System.out.println();
+
+ } catch (NullPointerException npe) {
+ npe.printStackTrace();
+ System.out.println("null\n");
+ }
+ }
+ }
+
+}
\ No newline at end of file
diff --git a/src/main/java/de/ids_mannheim/korap/query/serialize/QueryParser.java b/src/main/java/de/ids_mannheim/korap/query/serialize/QueryParser.java
new file mode 100644
index 0000000..078e5da
--- /dev/null
+++ b/src/main/java/de/ids_mannheim/korap/query/serialize/QueryParser.java
@@ -0,0 +1,138 @@
+package de.ids_mannheim.korap.query.serialize;
+
+import de.ids_mannheim.korap.query.poliqarp.PoliqarpParser;
+import de.ids_mannheim.korap.query.poliqarp.PoliqarpLexer;
+import de.ids_mannheim.korap.query.cosmas2.*;
+
+import org.antlr.runtime.ANTLRStringStream;
+import org.antlr.runtime.RecognitionException;
+import org.antlr.v4.runtime.tree.Tree;
+import org.antlr.v4.runtime.*;
+import java.lang.reflect.Method;
+
+ public class QueryParser {
+ // New lexer object
+ static Lexer poliqarpLexer = new PoliqarpLexer((CharStream)null);
+ static c2psLexer cosmasLexer = new c2psLexer();
+
+ static PoliqarpParser poliqarpParser = null;
+ static c2psParser cosmasParser = null;
+
+ /**
+ * Parse a regex and return the generated tree string
+ */
+ public static ParserRuleContext parse (String ql, String p) {
+ if (ql == "poliqarp") {
+ return parsePoliqarpQuery(p);
+ } else if (ql == "cosmas") {
+ return (ParserRuleContext) parseCosmasQuery(p);
+ } else {
+ throw new IllegalArgumentException( "Please specify correct QL");
+ }
+ }
+
+ private static Tree parseCosmasQuery(String p) {
+ Tree tree = null;
+ ANTLRStringStream
+ ss = new ANTLRStringStream(p);
+ c2psLexer
+ lex = new c2psLexer(ss);
+ org.antlr.runtime.CommonTokenStream tokens = //v3
+ new org.antlr.runtime.CommonTokenStream(lex);
+ cosmasParser = new c2psParser(tokens);
+ c2psParser.c2ps_query_return
+ c2Return = null;
+
+
+
+ try
+ {
+ c2Return = cosmasParser.c2ps_query(); // statt t().
+ }
+ catch (RecognitionException e)
+ {
+ e.printStackTrace();
+ }
+
+ // AST Tree anzeigen:
+ tree = (Tree)c2Return.getTree();
+ return tree;
+ }
+
+ private static ParserRuleContext parsePoliqarpQuery (String p) {
+ ParserRuleContext tree = null;
+ // Like p. 111
+ try {
+
+ // Tokenize input data
+ ANTLRInputStream input = new ANTLRInputStream(p);
+ poliqarpLexer.setInputStream(input);
+ CommonTokenStream tokens = new CommonTokenStream(poliqarpLexer);
+ poliqarpParser = new PoliqarpParser(tokens);
+
+ // Don't throw out erroneous stuff
+ poliqarpParser.setErrorHandler(new BailErrorStrategy());
+ poliqarpParser.removeErrorListeners();
+
+ // Get starting rule from parser
+ Method startRule = PoliqarpParser.class.getMethod("request");
+ tree = (ParserRuleContext) startRule.invoke(poliqarpParser, (Object[])null);
+ }
+
+ // Some things went wrong ...
+ catch (Exception e) {
+ System.err.println( e.getMessage() );
+ }
+
+ // Return the generated tree as a string
+ return tree;
+ }
+ /**
+ * @param args
+ */
+ public static void main(String[] args) {
+ /*
+ * for testing...
+ */
+ String[] queries = new String[] {
+// "[orth=korpus][base=korpus]",
+// "korpus [] language",
+// "[orth=\"bez.?\"/i] ",
+// "[orth=Przyszedł/i]",
+// "[orth=się][][][base=bać]",
+// "[orth=Abc]{2,4}",
+// "[orth=się][pos!=interp]{,5}[base=bać]|[base=bać][base=\"on|ja|ty|my|wy\"]?[orth=się]",
+// "\"(la){3,}\"/x ",
+// "[orth=korpus]+[pos=n]",
+// "[orth=korpus]+[pos=n] within s",
+// "[base=on & orth=ja] ",
+// "[base=\"ja|on\"] ",
+// "[orth=ja]{2,4}",
+// "[orth=ja]{2,}",
+// "[orth=ja]{,4}",
+// "[orth=ja]+",
+// "ja",
+// "ja ne",
+// "[base=in]",
+// "([orth=foo][base=bar])*",
+// "[orth=foo][base!=\"bar*\"]",
+// "[cas==nom/xi]",
+// "[base=foo|base=bar]"
+ "&Word"
+ };
+
+ for (String q : queries) {
+ try {
+ System.out.println(q);
+ System.out.println(parseCosmasQuery(q));
+// System.out.println(parsePoliqarpQuery(q).toStringTree(poliqarpParser));
+// System.out.println(parsePoliqarpQuery(q).getChild(0).toStringTree(poliqarpParser));
+ System.out.println();
+ } catch (NullPointerException npe) {
+ System.out.println("null\n");
+ }
+
+ }
+ }
+
+}