some more negation support: not(p and q) -> (not p) or (not q)

commit: f143ac9ebabdf74e9b433bc6e5c646abb41f3eb4 [log] [tgz]
author: Joachim Bingel <joa.bingel@gmail.com> Wed Dec 04 18:52:54 2013 +0000
committer: Joachim Bingel <joa.bingel@gmail.com> Wed Dec 04 18:52:54 2013 +0000
tree: b463fb96f19edd8b951c0cd4fc8badb1580b61e3
parent: 1417e1906a3a7ec5c89cf8024ebc3a7ff2d11591 [diff]
diff --git a/src/main/java/de/ids_mannheim/korap/query/serialize/JsonGenerator.java b/src/main/java/de/ids_mannheim/korap/query/serialize/JsonGenerator.java
index c4043e3..e8a8c33 100644
--- a/src/main/java/de/ids_mannheim/korap/query/serialize/JsonGenerator.java
+++ b/src/main/java/de/ids_mannheim/korap/query/serialize/JsonGenerator.java

@@ -57,10 +57,31 @@
 		String[] queries;
 		if (args.length==0) {
 			queries = new String[] {
-					"shrink({[base=foo]})",
+					/*
+					 * negation
+					 * elemente
+					 * within
+					 * regex
+					 * & field_group
+					 */
+					"[base=foo]|([base=foo][base=bar])* meta author=Goethe&year=1815",
+					"([base=foo]|[base=bar])[base=foobar]",
+					"shrink({[base=Mann]})",
 					"shrink({[base=foo]}[orth=bar])",
 					"shrink(1:[base=Der]{1:[base=Mann]})",
-					"([base=foo][base=bar])*",
+					
+					"[base=Katze]",
+					"[base!=Katze]",
+					"[!base=Katze]",
+					"[base=Katze&orth=Katzen]",
+					"[base=Katze][orth=und][orth=Hunde]",
+					"[!(base=Katze&orth=Katzen)]",
+					"contains(<np>,[base=Mann])",
+					"startswith(<np>,[!pos=Det])",
+					"'vers{2,3}uch'",
+					"[orth='vers.*ch']",
+					"[(base=bar|base=foo)&orth=foobar]",
+					
 				};
 		} else {
 			queries = new String[] {args[0]};

diff --git a/src/main/java/de/ids_mannheim/korap/query/serialize/PoliqarpPlusTree.java b/src/main/java/de/ids_mannheim/korap/query/serialize/PoliqarpPlusTree.java
index 6e50e62..d934e02 100644
--- a/src/main/java/de/ids_mannheim/korap/query/serialize/PoliqarpPlusTree.java
+++ b/src/main/java/de/ids_mannheim/korap/query/serialize/PoliqarpPlusTree.java

@@ -51,6 +51,10 @@
 	 */
 	boolean cqHasOcc = false;
 	/**
+	 * Flag for negation of complete field
+	 */
+	boolean negField = false;
+	/**
 	 * Parser object deriving the ANTLR parse tree.
 	 */
 	static Parser poliqarpParser;
@@ -143,7 +147,7 @@
 	@Override
 	public void process(String query) {
 		ParseTree tree = parsePoliqarpQuery(query);
-		System.out.println("Processing Poliqarp");
+		System.out.println("Processing PoliqarpPlus");
 		processNode(tree);
 	}
 	
@@ -168,7 +172,7 @@
 		Integer stackedObjects = 0;
 		Integer stackedTokens= 0;
 		Integer stackedFields = 0;
-		
+//		
 //		System.err.println(objectStack);
 //		System.out.println(openNodeCats);
 //		System.out.println(" "+requestMap.get("query")+"");
@@ -230,7 +234,8 @@
 						}
 					} else if (!objectStack.isEmpty()){
 						// embed in super sequence
-						ArrayList<Object> topSequenceOperands = (ArrayList<Object>) objectStack.get(1).get("operands");
+						int occExtraChild = cqHasOcc ? 1:0;
+						ArrayList<Object> topSequenceOperands = (ArrayList<Object>) objectStack.get(1+occExtraChild).get("operands");
 						topSequenceOperands.add(occGroup);
 					}
 				}
@@ -294,13 +299,14 @@
 				token = new LinkedHashMap<String, Object>();
 				tokenStack.push(token);
 				stackedTokens++;
+				// do this only if token is newly created, otherwise it'll be in objectStack twice
+				objectStack.push(token);
+				stackedObjects++;
 			} else {
 				// in case cq_segments has already added the token
 				token = tokenStack.getFirst();
 			}
 			curToken = token;
-			objectStack.push(token);
-			stackedObjects++;
 			// Step II: start filling object and add to containing sequence
 			token.put("@type", "korap:token");
 			// add token to sequence only if it is not an only child (in that case, cq_segments has already added the info and is just waiting for the values from "field")
@@ -336,6 +342,13 @@
 			// Step I: extract info
 			String featureName = node.getChild(0).getChild(0).toStringTree(poliqarpParser);   //e.g. (field_name base) (field_op !=) (re_query "bar*")
 			String relation = node.getChild(1).getChild(0).toStringTree(poliqarpParser);
+			if (negField) {
+				if (relation.startsWith("!")) {
+					relation = relation.substring(1);
+				} else {
+					relation = "!"+relation;
+				}
+			}
 			String value = "";
 			ParseTree valNode = node.getChild(2);
 			String valType = getNodeCat(valNode);
@@ -363,6 +376,10 @@
 			visited.add(node.getChild(2));
 		}
 		
+		if (nodeCat.equals("neg_field") || nodeCat.equals("neg_field_group")) {
+			negField=!negField;
+		}
+		
 		// conj_field serves for both conjunctions and disjunctions
 		if (nodeCat.equals("conj_field")) {
 			LinkedHashMap<String,Object> group = new LinkedHashMap<String,Object>(); 
@@ -374,18 +391,20 @@
 			// Step I: get operator (& or |)
 			ParseTree operatorNode = node.getChild(1).getChild(0);
 			String operator = getNodeCat(operatorNode);
-			if (operator.equals("|")) {
-				group.put("relation", "or");
-			} else if (operator.equals("&")) {
-				group.put("relation", "and");
+			String relation = operator.equals("&") ? "and" : "or";
+			if (negField) {
+				relation = relation.equals("or") ? "and": "or";
 			}
+			group.put("relation", relation);
 			// Step II: decide where to put the group (directly under token or in top meta filter section or embed in super group)
 			if (openNodeCats.get(1).equals("cq_segment")) {
 				tokenStack.getFirst().put("@value", group);
 			} else if (openNodeCats.get(1).equals("meta_field_group")) {
 				((HashMap<String, Object>) requestMap.get("meta")).put("@value", group);
-			} else {
+			} else if (openNodeCats.get(2).equals("conj_field")) {
 				fieldStack.get(1).add(group);
+			} else {
+				tokenStack.getFirst().put("@value", group);
 			}
 			// skip the operator
 			visited.add(node.getChild(1));
@@ -420,6 +439,30 @@
 			}
 		}
 		
+		if (nodeCat.equals("re_query")) {
+			LinkedHashMap<String,Object> reQuery = new LinkedHashMap<String,Object>();
+			reQuery.put("@subtype", "korap:regex");
+			String regex = node.getChild(0).toStringTree(poliqarpParser);
+			reQuery.put("@value", regex);
+			reQuery.put("relation", "=");
+			
+			if (!openNodeCats.get(1).equals("field")) {
+				LinkedHashMap<String,Object> token = new LinkedHashMap<String,Object>();
+				token.put("@type", "korap:token");
+				token.put("@value", reQuery);
+				reQuery.put("type", "korap:term");
+				
+				if (openNodeCats.get(1).equals("query")) {
+					requestMap.put("query", token);
+				} else {
+					ArrayList<Object> topSequenceOperands = (ArrayList<Object>) objectStack.get(1).get("operands");
+					topSequenceOperands.add(token);
+				}
+			} 
+//			ArrayList<Object> topSequenceOperands = (ArrayList<Object>) objectStack.get(0).get("operands");
+//			topSequenceOperands.add(reQuery);
+		}
+		
 		if (nodeCat.equals("element")) {
 			// Step I: determine whether to create new token or get token from the stack (if added by cq_segments)
 			LinkedHashMap<String, Object> elem;
@@ -594,6 +637,11 @@
 			processNode(child);
 		}
 				
+		// set negField back 
+		if (nodeCat.equals("neg_field") || nodeCat.equals("neg_field_group")) {
+			negField = !negField;
+		}
+		
 		// Stuff that happens when leaving a node (taking items off the stacks)
 		for (int i=0; i<objectsToPop.get(0); i++) {
 			objectStack.pop();
@@ -607,36 +655,6 @@
 			fieldStack.pop();
 		}
 		fieldsToPop.pop();
-		
-//		// Stuff that happens when leaving a node (taking it off the stack)
-//		if (nodeCat.equals("cq_segments") || nodeCat.equals("sq_segments")) {
-//			// exclude whitespaces analysed as empty cq_segments
-//			if (!ignoreCq_segment && !objectStack.isEmpty()) {
-//				objectStack.pop();
-//			}
-//		}
-//		
-//		if (nodeCat.equals("cq_disj_segments")) {
-//			objectStack.pop();
-//		}
-//		
-//		if (nodeCat.equals("cq_segment") || nodeCat.equals("sq_segment")){
-//			tokenStack.pop();
-//			objectStack.pop();
-//		}
-//		
-//		if (nodeCat.equals("conj_field")) {
-//			fieldStack.pop();
-//		}
-//		
-//		if (nodeCat.equals("position") || nodeCat.equals("spanclass")) {
-//			objectStack.pop();
-//		}
-//		
-//		if (nodeCat.equals("shrink")) {
-//			objectStack.pop();
-//		}
-		
 		openNodeCats.pop();
 	}
 
@@ -705,33 +723,45 @@
 		 * For testing
 		 */
 		String[] queries = new String[] {
-//				"[base=foo]|([base=foo][base=bar])*",
-//				"([base=foo]|[base=bar])[base=foobar]",
-//				"[base=foo]([base=bar]|[base=foobar/i])",
-//				"[base=bar|base=foo]",
-//				"[base=bar]",
-//				"[base=foo][base=bar]",
+				"[base=foo]|([base=foo][base=bar])* meta author=Goethe&year=1815",
+				"([base=foo]|[base=bar])[base=foobar]",
+				"shrink({[base=Mann]})",
+				"shrink({[base=foo]}[orth=bar])",
+				"shrink(1:[base=Der]{1:[base=Mann]})",
+				"[base=foo]|([base=foo][base=bar])*",
+				"([base=foo]|[base=bar])[base=foobar]",
+				"[base=foo]([base=bar]|[base=foobar/i])",
+				"[base=bar|base=foo]",
+				"[base=bar]",
+				"[base=foo][base=bar]",
 				"[(base=bar|base=foo)&orth=wee]",
-//				"[base=foo/i][base=bar]{2,4}",
-//				"foo bar/i",
-//				"{[base=foo]}[orth=bar]",
-//				"{[base=foo]}{[orth=bar]}",
-//				"{1:[base=foo]<np>}",
-//				"{[base=foo]}[orth=bar]",
+				"[base=foo/i][base=bar]{2,4}",
+				"foo bar/i",
+				"{[base=foo]}[orth=bar]",
+				"{[base=foo]}{[orth=bar]}",
+				"{1:[base=foo]<np>}",
+				"{[base=foo]}[orth=bar]",
 				"shrink({[base=foo]})",
 				"shrink({[base=foo]})[orth=bar]",
 				"[orth=bar]shrink({[base=foo]})",
 				"[base=foo]*[base=bar]",
 				"([base=foo][base=bar])*",
 				"shrink(1:[base=Der]{1:[base=Mann]})",
-//				"shrink(1:{[base=Der][base=Mann]})",
-//				"{[base=foo][orth=bar]}",
-//				"[base=der]shrink(1:[base=Der]{1:[base=Mann]})"
-//				"<np>",
+				"[base=foo]<np>",
+				"[base!=foo]",
+				"[base=foo&orth=bar]",
+				"[!(base=foo&orth=bar)]",
+				"'vers{2,3}uch'",
+				"[orth='vers*ch']",
+				"shrink(1:{[base=Der][base=Mann]})",
+				"{[base=foo][orth=bar]}",
+				"[base=der]shrink(1:[base=Der]{1:[base=Mann]})",
+				"<np>",
 //				"startsWith({<sentence>},<np>)",
 //				"startsWith({<sentence>},[base=foo])",
-//				"[base=foo]|([base=foo][base=bar])* meta author=Goethe&year=1815",
-//				"([base=foo][base=bar])*",
+				"[base=foo]|([base=foo][base=bar])* meta author=Goethe&year=1815",
+				"([base=foo][base=bar])*",
+				"[(base=bar|base=foo)&orth=foobar]",
 				};
 		for (String q : queries) {
 			try {
commit	f143ac9ebabdf74e9b433bc6e5c646abb41f3eb4	[log] [tgz]
author	Joachim Bingel <joa.bingel@gmail.com>	Wed Dec 04 18:52:54 2013 +0000
committer	Joachim Bingel <joa.bingel@gmail.com>	Wed Dec 04 18:52:54 2013 +0000
tree	b463fb96f19edd8b951c0cd4fc8badb1580b61e3
parent	1417e1906a3a7ec5c89cf8024ebc3a7ff2d11591 [diff]