install works for KorAP-querySerialization
diff --git a/pom.xml b/pom.xml
index bca12ca..a4a0098 100644
--- a/pom.xml
+++ b/pom.xml
@@ -34,6 +34,16 @@
<classifier>complete</classifier>
</dependency>
<dependency>
+ <groupId>org.codehaus.jackson</groupId>
+ <artifactId>jackson-core-asl</artifactId>
+ <version>1.9.2</version>
+ </dependency>
+ <dependency>
+ <groupId>org.codehaus.jackson</groupId>
+ <artifactId>jackson-mapper-asl</artifactId>
+ <version>1.9.2</version>
+ </dependency>
+ <dependency>
<groupId>KorAP-modules</groupId>
<artifactId>KorAP-PoliqarpParser</artifactId>
<version>0.05</version>
@@ -50,6 +60,9 @@
<configuration>
<source>1.7</source>
<target>1.7</target>
+ <excludes>
+ <!--exclude>**/CosmasTree.java</exclude-->
+ </excludes>
</configuration>
</plugin>
</plugins>
diff --git a/src/main/java/de/ids_mannheim/korap/query/serialize/AbstractSyntaxTree.java b/src/main/java/de/ids_mannheim/korap/query/serialize/AbstractSyntaxTree.java
index 5876d41..cf19977 100644
--- a/src/main/java/de/ids_mannheim/korap/query/serialize/AbstractSyntaxTree.java
+++ b/src/main/java/de/ids_mannheim/korap/query/serialize/AbstractSyntaxTree.java
@@ -4,8 +4,6 @@
public abstract class AbstractSyntaxTree {
- String query;
-
public abstract Map<String, Object> getRequestMap();
public abstract void process(String query);
diff --git a/src/main/java/de/ids_mannheim/korap/query/serialize/CosmasTree.java b/src/main/java/de/ids_mannheim/korap/query/serialize/CosmasTree.java
deleted file mode 100644
index ee8a5f2..0000000
--- a/src/main/java/de/ids_mannheim/korap/query/serialize/CosmasTree.java
+++ /dev/null
@@ -1,350 +0,0 @@
-package de.ids_mannheim.korap.query.serialize;
-
-import java.util.ArrayList;
-import java.util.LinkedHashMap;
-import java.util.LinkedList;
-import java.util.List;
-import java.util.Map;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-
-import org.antlr.runtime.ANTLRStringStream;
-import org.antlr.runtime.RecognitionException;
-import org.antlr.runtime.tree.Tree;
-
-import de.ids_mannheim.korap.query.cosmas2.c2psLexer;
-import de.ids_mannheim.korap.query.cosmas2.c2psParser;
-
-/**
- * Map representation of CosmasII syntax tree as returned by ANTLR
- * @author joachim
- *
- */
-public class CosmasTree extends AbstractSyntaxTree {
-
- private static c2psParser cosmasParser;
- /*
- * Following collections have the following functions:
- * - the request is a map with two keys (meta/query): {meta=[], query=[]}
- * - the query is a list of token group maps: {meta=[], query=[tg1=[], tg2=[]]}
- * - each token group is a list of tokens: {meta=[], query=[tg1=[t1_1, t1_2], tg2=[t2_1, t2_2, t2_3]]}
- * - each token corresponds to a single 'fields' linked list {meta=[], query=[tg1=[t1_1=[], t1_2=[]], ... ]}
- * - each fields list contains a logical operator and 'field maps' defining attributes and values
- * {meta=[], query=[tg1=[t1_1=[[disj, {base=foo}, {base=bar}]], t1_2=[]], ... ]}
- */
- String query;
- LinkedHashMap<String,Object> requestMap = new LinkedHashMap<String,Object>();
- LinkedHashMap<String,Object> queryMap = new LinkedHashMap<String,Object>();
- LinkedHashMap<String,Object> tokenGroup = new LinkedHashMap<String,Object>();
- ArrayList<Object> fieldGroup = new ArrayList<Object>();
- LinkedHashMap<String,Object> fieldMap;
- ArrayList<List<Object>> distantTokens;
- /**
- * Makes it possible to store several distantTokenGroups
- */
- LinkedList<ArrayList<List<Object>>> distantTokensStack = new LinkedList<ArrayList<List<Object>>>();
- /**
- * Field for repetition query (Kleene + or * operations, or min/max queries: {2,4}
- */
- String repetition = "";
- int tokenCount=0;
- int tokenGroupCount=0;
- /**
- * Keeps track of open node categories
- */
- LinkedList<String> openNodeCats = new LinkedList<String>();
- /**
- * Global control structure for fieldGroups, keeps track of open fieldGroups.
- */
- LinkedList<ArrayList<Object>> openFieldGroups = new LinkedList<ArrayList<Object>>();
- /**
- * Global control structure for tokenGroups, keeps track of open tokenGroups.
- */
- LinkedList<LinkedHashMap<String,Object>> tokenGroupsStack = new LinkedList<LinkedHashMap<String,Object>>();
- /**
- * Flag that indicates whether token fields or meta fields are currently being processed
- */
- boolean inMeta = false;
- boolean negate = false;
-
- Tree cosmasTree;
-
- LinkedHashMap<String,Object> treeMap = new LinkedHashMap<String,Object>();
- /**
- * Keeps track of all visited nodes in a tree
- */
- List<Tree> visited = new ArrayList<Tree>();
-
-
- /**
- *
- * @param tree The syntax tree as returned by ANTLR
- * @param parser The ANTLR parser instance that generated the parse tree
- */
- public CosmasTree(String query) {
- this.query = query;
- process(query);
- System.out.println(requestMap);
- }
-
- @Override
- public Map<String, Object> getRequestMap() {
- return this.requestMap;
- }
-
- @Override
- public void process(String query) {
- Tree tree = parseCosmasQuery(query);
- System.out.println("Processing Cosmas");
- processNode(tree);
- }
-
- private void processNode(Tree node) {
-
- // Top-down processing
- if (visited.contains(node)) return;
- else visited.add(node);
-
-
- String nodeCat = getNodeCat(node);
- openNodeCats.push(nodeCat);
-
-
-
- System.out.println(openNodeCats);
- System.out.println(distantTokensStack);
-
- /* ***************************************
- * Processing individual node categories *
- *****************************************/
- // C2QP is tree root
- if (nodeCat.equals("C2PQ")) {
- queryMap = new LinkedHashMap<String,Object>();
- requestMap.put("query", queryMap);
- }
-
- // Nodes introducing tokens. Process all in the same manner, except for the fieldMap entry
- if (nodeCat.equals("OPWF") || nodeCat.equals("OPLEM") || nodeCat.equals("OPMORPH")) {
-
- if (tokenGroupsStack.isEmpty()) {
- tokenGroup = new LinkedHashMap<String, Object>();
- tokenCount=0;
- tokenGroupCount++;
- queryMap.put("tokenGroup"+tokenGroupCount, tokenGroup);
- tokenGroupsStack.push(tokenGroup);
- } else {
- tokenGroup = tokenGroupsStack.getFirst();
- }
-
- // check if this token comes after a distant operator (like "/+w3:4") and if yes,
- // insert the empty tokenGroups before the current token
- if (openNodeCats.get(1).equals("ARG2")) {
- if (openNodeCats.get(2).equals("OPPROX") && !distantTokensStack.isEmpty()) {
- for (List<Object> distantTokenGroup : distantTokensStack.pop()) {
-// if (tokenGroupsStack.isEmpty()) {
-// queryMap.put("token"+tokenGroupCount+"_1", distantTokenGroup);
-// } else {
- tokenCount++;
- tokenGroupsStack.getFirst().put("token"+tokenGroupCount+"_"+tokenCount, distantTokenGroup);
-// }
-// tokenGroupCount++;
- }
- }
- // check negation of token by preceding OPNOT
-// else if (openNodeCats.get(2).equals("OPNOT")) {
-// negate = true;
-// }
- }
-
- fieldGroup = new ArrayList<Object>();
- tokenCount++;
- tokenGroup.put("token"+tokenGroupCount+"_"+tokenCount, fieldGroup);
-
- fieldMap = new LinkedHashMap<String, Object>();
- fieldGroup.add(fieldMap);
-
- // make category-specific fieldMap entry
- if (nodeCat.equals("OPWF")) {
- fieldMap.put("form", node.getChild(0).toStringTree());
- }
- if (nodeCat.equals("OPLEM")) {
- fieldMap.put("lemma", node.getChild(0).toStringTree());
- }
- if (nodeCat.equals("OPMORPH")) {
- fieldMap.put("morph", node.toStringTree());
- //TODO decompose morphology query
- }
- // negate field (see above)
- if (negate) {
- fieldMap.put("relation", "!=");
- }
-
-// tokenGroupsStack.push(tokenGroup);
- }
-
- // negate every token that's under OPNOT > ARG2
- if (nodeCat.equals("ARG2") && openNodeCats.get(1).equals("OPNOT")) {
- negate = true;
- }
-
- if (nodeCat.equals("OPOR")) {
- tokenGroup = new LinkedHashMap<String, Object>();
- tokenCount=0;
- tokenGroupCount++;
- if (tokenGroupsStack.isEmpty()) {
- queryMap.put("tokenGroup"+tokenGroupCount, tokenGroup);
- } else {
- tokenGroupsStack.getFirst().put("tokenGroup"+tokenGroupCount, tokenGroup);
- }
- tokenGroup.put("type", "disj");
- tokenGroupsStack.push(tokenGroup);
- }
-
- if (nodeCat.equals("OPAND")) {
- tokenGroup = new LinkedHashMap<String, Object>();
- tokenCount=0;
- tokenGroupCount++;
- if (tokenGroupsStack.isEmpty()) {
- queryMap.put("tokenGroup"+tokenGroupCount, tokenGroup);
- } else {
- tokenGroupsStack.getFirst().put("tokenGroup"+tokenGroupCount, tokenGroup);
- }
- tokenGroup.put("type", "conj");
- tokenGroupsStack.push(tokenGroup);
- }
-
- if (nodeCat.equals("OPPROX")) {
- distantTokens = new ArrayList<List<Object>>();
- Tree prox_opts = node.getChild(0);
- Tree typ = prox_opts.getChild(0);
- System.err.println(typ.getChild(0).toStringTree());
- Tree dist_list = prox_opts.getChild(1);
- // get relevant information
- String direction = dist_list.getChild(0).getChild(0).getChild(0).toStringTree();
- String min = dist_list.getChild(0).getChild(1).getChild(0).toStringTree();
- String max = dist_list.getChild(0).getChild(1).getChild(1).toStringTree();
- if (min.equals("VAL0")) {
- min=max;
- }
- // create empty tokens and put them on the stack to place them between arg1 and arg2
- for (int i=0; i<Integer.parseInt(max)-1; i++) {
- ArrayList<Object> emptyToken = new ArrayList<Object>();
- LinkedHashMap<String,Object> emptyFieldMap = new LinkedHashMap<String,Object>();
- emptyToken.add(emptyFieldMap);
- tokenGroup.put("token"+tokenGroupCount+"_1", emptyToken);
- // mark all tokens between min and max optional
- if (i>=Integer.parseInt(min)) {
- emptyFieldMap.put("optional", "true");
- }
- distantTokens.add(emptyToken);
- }
- distantTokensStack.push(distantTokens);
- }
-
-
-// System.err.println(tokenGroupsStack.size()+" "+tokenGroupsStack);
- // recursion until 'query' node (root of tree) is processed
- for (int i=0; i<node.getChildCount(); i++) {
- Tree child = node.getChild(i);
- processNode(child);
- }
-
- if (nodeCat.equals("ARG2") && openNodeCats.get(1).equals("OPNOT")) {
- negate = false;
- }
-
- if (nodeCat.equals("OPAND") || nodeCat.equals("OPOR")) {
- tokenGroupsStack.pop();
-// tokenGroupCount--;
-// tokenCount=0;
- }
-
- openNodeCats.pop();
-
- }
-
- /**
- * Returns the category (or 'label') of the root of a ParseTree.
- * @param node
- * @return
- */
- public String getNodeCat(Tree node) {
- String nodeCat = node.toStringTree();
- Pattern p = Pattern.compile("\\((.*?)\\s"); // from opening parenthesis to 1st whitespace
- Matcher m = p.matcher(node.toStringTree());
- if (m.find()) {
- nodeCat = m.group(1);
- }
- return nodeCat;
- }
-
- private static Tree parseCosmasQuery(String p) {
- Tree tree = null;
- ANTLRStringStream
- ss = new ANTLRStringStream(p);
- c2psLexer
- lex = new c2psLexer(ss);
- org.antlr.runtime.CommonTokenStream tokens = //v3
- new org.antlr.runtime.CommonTokenStream(lex);
- cosmasParser = new c2psParser(tokens);
- c2psParser.c2ps_query_return
- c2Return = null;
- try
- {
- c2Return = cosmasParser.c2ps_query(); // statt t().
- }
- catch (RecognitionException e)
- {
- e.printStackTrace();
- }
- // AST Tree anzeigen:
- tree = (Tree)c2Return.getTree();
- return tree;
- }
-
- /**
- * @param args
- */
- public static void main(String[] args) {
- /*
- * For testing
- */
- String[] queries = new String[] {
- /* COSMAS 2 */
-// "&Mond",
-// "Mond Sterne",
-// "Mond*",
-// "Mond oder Sterne",
-// "(des oder eines) /+w2 (Bauern oder Bauers oder Bauerns)",
-// "(Sonne /+w2 Mond) /+w2:3 Sterne",
-// "Mond oder Sonne /w2 Sterne",
-// "MORPH(V PCP)",
-// "MORPH(V PCP) Baum" ,
-// "Sonne %w2 Mond",
-// "Sonne /w2 Mond",
-// "Sonne nicht (Mond Stern)",
-// "Sonne nicht (Mond oder Stern)",
-// "Sonne /+w1:4 Mond",
- "(sonne und mond) oder sterne",
- "(stern oder (sonne und mond)) und MORPH(V PCP)",
- "(sonne und (stern oder mond)) /+w2 luna???",
- "(Tag /+w2 $offenen) /+w1 Tür",
- "heißt /+w2 \"und\" ,"
- };
- for (String q : queries) {
- try {
- System.out.println(q);
- System.out.println(parseCosmasQuery(q).toStringTree());
- CosmasTree act = new CosmasTree(q);
- System.out.println();
-
- } catch (NullPointerException npe) {
- npe.printStackTrace();
- System.out.println("null\n");
- }
- }
- }
-
-
-}
-
diff --git a/src/main/java/de/ids_mannheim/korap/query/serialize/JsonGenerator.java b/src/main/java/de/ids_mannheim/korap/query/serialize/JsonGenerator.java
index 9661e5e..10c0be9 100644
--- a/src/main/java/de/ids_mannheim/korap/query/serialize/JsonGenerator.java
+++ b/src/main/java/de/ids_mannheim/korap/query/serialize/JsonGenerator.java
@@ -8,6 +8,9 @@
import org.codehaus.jackson.map.JsonMappingException;
import org.codehaus.jackson.map.ObjectMapper;
+import de.ids_mannheim.korap.query.serialize.AbstractSyntaxTree;
+import de.ids_mannheim.korap.query.serialize.PoliqarpPlusTree;
+
public class JsonGenerator {
ObjectMapper mapper;
@@ -19,9 +22,9 @@
public void run(String outFile, String query, String queryLanguage) throws JsonGenerationException, JsonMappingException, IOException {
if (queryLanguage.equals("poliqarp")) {
- ast = new PoliqarpTree(query);
- } else if (queryLanguage.equals("cosmas")) {
- ast = new CosmasTree(query);
+ ast = new PoliqarpPlusTree(query);
+// } else if (queryLanguage.equals("cosmas")) {
+// ast = new CosmasTree(query);
}
Map<String, Object> requestMap = ast.getRequestMap();
mapper.writeValue(new File(outFile), requestMap);
@@ -36,12 +39,17 @@
*/
JsonGenerator jg = new JsonGenerator();
int i=0;
- String[] queries = new String[] {
-
+ String[] queries;
+ if (args.length==0) {
+ queries = new String[] {
"[base=foo]|([base=foo][base=bar])* meta author=Goethe&year=1815",
"([base=foo]|[base=foo])[base=foobar]",
"[base=foo]([base=foo]|[base=foobar])",
};
+ } else {
+ queries = new String[] {args[0]};
+ }
+
for (String q : queries) {
i++;
try {
diff --git a/src/main/java/de/ids_mannheim/korap/query/serialize/PoliqarpPlusTree.java b/src/main/java/de/ids_mannheim/korap/query/serialize/PoliqarpPlusTree.java
index 2d05d06..ff6fe90 100644
--- a/src/main/java/de/ids_mannheim/korap/query/serialize/PoliqarpPlusTree.java
+++ b/src/main/java/de/ids_mannheim/korap/query/serialize/PoliqarpPlusTree.java
@@ -19,8 +19,11 @@
import org.antlr.v4.runtime.ParserRuleContext;
import org.antlr.v4.runtime.tree.ParseTree;
-import de.ids_mannheim.korap.query.poliqarpplus.PoliqarpPlusLexer;
-import de.ids_mannheim.korap.query.poliqarpplus.PoliqarpPlusParser;
+//import de.ids_mannheim.korap.query.poliqarpplus.PoliqarpPlusLexer;
+//import de.ids_mannheim.korap.query.poliqarpplus.PoliqarpPlusParser;
+import de.ids_mannheim.korap.query.PoliqarpPlusLexer;
+import de.ids_mannheim.korap.query.PoliqarpPlusParser;
+import de.ids_mannheim.korap.query.serialize.AbstractSyntaxTree;
/**
* Map representation of Poliqarp syntax tree as returned by ANTLR
@@ -85,7 +88,6 @@
* @param parser The ANTLR parser instance that generated the parse tree
*/
public PoliqarpPlusTree(String query) {
- this.query = query;
prepareContext();
process(query);
System.out.println(">>> "+requestMap.get("query")+" <<<");
diff --git a/src/main/java/de/ids_mannheim/korap/query/serialize/PoliqarpTree.java b/src/main/java/de/ids_mannheim/korap/query/serialize/PoliqarpTree.java
index 2ab76da..4913ab2 100644
--- a/src/main/java/de/ids_mannheim/korap/query/serialize/PoliqarpTree.java
+++ b/src/main/java/de/ids_mannheim/korap/query/serialize/PoliqarpTree.java
@@ -19,8 +19,11 @@
import org.antlr.v4.runtime.ParserRuleContext;
import org.antlr.v4.runtime.tree.ParseTree;
-import de.ids_mannheim.korap.query.poliqarp.PoliqarpLexer;
-import de.ids_mannheim.korap.query.poliqarp.PoliqarpParser;
+//import de.ids_mannheim.korap.query.poliqarp.PoliqarpLexer;
+//import de.ids_mannheim.korap.query.poliqarp.PoliqarpParser;
+import de.ids_mannheim.korap.query.PoliqarpLexer;
+import de.ids_mannheim.korap.query.PoliqarpParser;
+import de.ids_mannheim.korap.query.serialize.AbstractSyntaxTree;
/**
* Map representation of Poliqarp syntax tree as returned by ANTLR
@@ -81,7 +84,6 @@
* @param parser The ANTLR parser instance that generated the parse tree
*/
public PoliqarpTree(String query) {
- this.query = query;
prepareContext();
process(query);
System.out.println(">>> "+requestMap+" <<<");
diff --git a/src/main/java/de/ids_mannheim/korap/query/serialize/QueryParser.java b/src/main/java/de/ids_mannheim/korap/query/serialize/QueryParser.java
deleted file mode 100644
index 078e5da..0000000
--- a/src/main/java/de/ids_mannheim/korap/query/serialize/QueryParser.java
+++ /dev/null
@@ -1,138 +0,0 @@
-package de.ids_mannheim.korap.query.serialize;
-
-import de.ids_mannheim.korap.query.poliqarp.PoliqarpParser;
-import de.ids_mannheim.korap.query.poliqarp.PoliqarpLexer;
-import de.ids_mannheim.korap.query.cosmas2.*;
-
-import org.antlr.runtime.ANTLRStringStream;
-import org.antlr.runtime.RecognitionException;
-import org.antlr.v4.runtime.tree.Tree;
-import org.antlr.v4.runtime.*;
-import java.lang.reflect.Method;
-
- public class QueryParser {
- // New lexer object
- static Lexer poliqarpLexer = new PoliqarpLexer((CharStream)null);
- static c2psLexer cosmasLexer = new c2psLexer();
-
- static PoliqarpParser poliqarpParser = null;
- static c2psParser cosmasParser = null;
-
- /**
- * Parse a regex and return the generated tree string
- */
- public static ParserRuleContext parse (String ql, String p) {
- if (ql == "poliqarp") {
- return parsePoliqarpQuery(p);
- } else if (ql == "cosmas") {
- return (ParserRuleContext) parseCosmasQuery(p);
- } else {
- throw new IllegalArgumentException( "Please specify correct QL");
- }
- }
-
- private static Tree parseCosmasQuery(String p) {
- Tree tree = null;
- ANTLRStringStream
- ss = new ANTLRStringStream(p);
- c2psLexer
- lex = new c2psLexer(ss);
- org.antlr.runtime.CommonTokenStream tokens = //v3
- new org.antlr.runtime.CommonTokenStream(lex);
- cosmasParser = new c2psParser(tokens);
- c2psParser.c2ps_query_return
- c2Return = null;
-
-
-
- try
- {
- c2Return = cosmasParser.c2ps_query(); // statt t().
- }
- catch (RecognitionException e)
- {
- e.printStackTrace();
- }
-
- // AST Tree anzeigen:
- tree = (Tree)c2Return.getTree();
- return tree;
- }
-
- private static ParserRuleContext parsePoliqarpQuery (String p) {
- ParserRuleContext tree = null;
- // Like p. 111
- try {
-
- // Tokenize input data
- ANTLRInputStream input = new ANTLRInputStream(p);
- poliqarpLexer.setInputStream(input);
- CommonTokenStream tokens = new CommonTokenStream(poliqarpLexer);
- poliqarpParser = new PoliqarpParser(tokens);
-
- // Don't throw out erroneous stuff
- poliqarpParser.setErrorHandler(new BailErrorStrategy());
- poliqarpParser.removeErrorListeners();
-
- // Get starting rule from parser
- Method startRule = PoliqarpParser.class.getMethod("request");
- tree = (ParserRuleContext) startRule.invoke(poliqarpParser, (Object[])null);
- }
-
- // Some things went wrong ...
- catch (Exception e) {
- System.err.println( e.getMessage() );
- }
-
- // Return the generated tree as a string
- return tree;
- }
- /**
- * @param args
- */
- public static void main(String[] args) {
- /*
- * for testing...
- */
- String[] queries = new String[] {
-// "[orth=korpus][base=korpus]",
-// "korpus [] language",
-// "[orth=\"bez.?\"/i] ",
-// "[orth=Przyszedł/i]",
-// "[orth=się][][][base=bać]",
-// "[orth=Abc]{2,4}",
-// "[orth=się][pos!=interp]{,5}[base=bać]|[base=bać][base=\"on|ja|ty|my|wy\"]?[orth=się]",
-// "\"(la){3,}\"/x ",
-// "[orth=korpus]+[pos=n]",
-// "[orth=korpus]+[pos=n] within s",
-// "[base=on & orth=ja] ",
-// "[base=\"ja|on\"] ",
-// "[orth=ja]{2,4}",
-// "[orth=ja]{2,}",
-// "[orth=ja]{,4}",
-// "[orth=ja]+",
-// "ja",
-// "ja ne",
-// "[base=in]",
-// "([orth=foo][base=bar])*",
-// "[orth=foo][base!=\"bar*\"]",
-// "[cas==nom/xi]",
-// "[base=foo|base=bar]"
- "&Word"
- };
-
- for (String q : queries) {
- try {
- System.out.println(q);
- System.out.println(parseCosmasQuery(q));
-// System.out.println(parsePoliqarpQuery(q).toStringTree(poliqarpParser));
-// System.out.println(parsePoliqarpQuery(q).getChild(0).toStringTree(poliqarpParser));
- System.out.println();
- } catch (NullPointerException npe) {
- System.out.println("null\n");
- }
-
- }
- }
-
-}