Issue #66: REG: missing #REG-Operator implemented: rebased.
Added new tests.
Moved general purpose methods to StringUtils.java.

Change-Id: I42f12251a73511fff07b48e06f6018ba1e181433
Reviewed-on: https://korap.ids-mannheim.de/gerrit/c/KorAP/Koral/+/7658
Reviewed-by: Nils Diewald <nils@diewald-online.de>
diff --git a/pom.xml b/pom.xml
index 6bdd557..8d22bf5 100644
--- a/pom.xml
+++ b/pom.xml
@@ -177,6 +177,7 @@
 		          	<exclude>**/c2ps_opIN.java</exclude>
 		          	<exclude>**/c2ps_opOV.java</exclude>
 		          	<exclude>**/c2ps_opPROX.java</exclude>
+		          	<exclude>**/c2ps_opREG.java</exclude>
 		          	<exclude>**/c2ps_opWF.java</exclude>
 		          	<exclude>**/c2ps_optCase.java</exclude>
 		          	<exclude>**/.gitignore</exclude>
diff --git a/src/main/antlr/cosmas/c2ps.g b/src/main/antlr/cosmas/c2ps.g
index c264ea6..8908a49 100644
--- a/src/main/antlr/cosmas/c2ps.g
+++ b/src/main/antlr/cosmas/c2ps.g
@@ -1,16 +1,20 @@
- // * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
-//												//
-// 	COSMAS II zeilenorientierten Suchanfragesprache (C2 plain syntax)			//
-// 	globale Grammatik (ruft lokale c2ps_x.g Grammatiken auf).				//
-//	17.12.12/FB										//
-//      v-0.6											//
-// TODO:											//
-// - se1: Einsetzen des Default-Operators in den kumulierten AST.				//
+// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+//												
+// 	COSMAS II zeilenorientierten Suchanfragesprache (C2 plain syntax)	
+// 	globale Grammatik (ruft lokale c2ps_x.g Grammatiken auf).			
+//	17.12.12/FB										
+//      v-0.6										
+// TODO:											
+// - se1: Einsetzen des Default-Operators in den kumulierten AST.		
+//
+//  v0.7 - 25.07.23/FB
+//    - added: #REG(x)
 // * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
 
 grammar c2ps;
 
 options { output=AST; backtrack=true; k=5;}
+// tokens that will appear as node names in the resulting AST:
 tokens  {C2PQ; OPBED; OPTS; OPBEG; OPEND; OPNHIT; OPALL; OPLEM; OPPROX;
 	 ARG1; ARG2; 
 	 OPWF; OPLEM; OPANNOT;
@@ -21,6 +25,7 @@
 	 OPNOT;
 	 OPEXPR1;
 	 OPMORPH; OPELEM;
+	 OPREG;
 	}
 
 @header {package de.ids_mannheim.korap.query.parse.cosmas;}
@@ -76,6 +81,14 @@
 
 OP_OV	:	'#OV' | '#OV(' OP_OV_OPTS? ')' ;
 
+// #REG(abc['"]) or #REG('abc\'s') or #REG("abc\"s"):
+
+OP_REG	: '#REG(' ' '* '\'' ('\\\''|~'\'')+  '\'' (' ')* ')'	
+			| 
+		  '#REG(' ' '* '"' ('\\"'|~'"')+ '"' (' ')* ')'
+		  	|
+		  '#REG(' ' '* ~('\''|'"'|' ') (~(')'))* ')';
+
 // EAVEXP wird hier eingesetzt für eine beliebige Sequenz von Zeichen bis zu ')'.
 fragment OP_IN_OPTS
 	:	EAVEXPR ;
@@ -241,7 +254,7 @@
 // OP1: Suchoperatoren mit 1 Argument:
 // -----------------------------------
 
-op1	:	opBEG | opEND | opNHIT | opALL | opBED; 
+op1	:	opBEG | opEND | opNHIT | opALL | opBED | opREG; 
 
 // #BED(serchExpr, B).
 // B muss nachträglich in einer lokalen Grammatik überprüft werden.
@@ -259,3 +272,6 @@
 opNHIT	:	( '#NHIT(' | '#INKLUSIVE(' ) searchExpr ')' -> ^(OPNHIT searchExpr) ;
 
 opALL	:	( '#ALL(' | '#EXKLUSIVE(' ) searchExpr ')'  -> ^(OPALL searchExpr) ;
+
+opREG	:	OP_REG -> ^(OPREG {c2ps_opREG.encode($OP_REG.text, OPREG)}) ;
+
diff --git a/src/main/java/de/ids_mannheim/korap/query/parse/cosmas/c2ps_opBED.java b/src/main/java/de/ids_mannheim/korap/query/parse/cosmas/c2ps_opBED.java
index fb9df4e..35f6437 100644
--- a/src/main/java/de/ids_mannheim/korap/query/parse/cosmas/c2ps_opBED.java
+++ b/src/main/java/de/ids_mannheim/korap/query/parse/cosmas/c2ps_opBED.java
@@ -17,7 +17,8 @@
         c2ps_opBEDParser.opBEDOpts_return c2PQReturn = null;
 
         /*
-        System.out.println("check opBED: " + index + ": " + input);
+        System.out.format("opBED: check: input='%s', index=%d.\n", input, index); 
+        System.out.format("opBED: tokens ='%s'.\n", tokens.toString());
         System.out.flush();
         */
 
@@ -68,7 +69,7 @@
 
 
     public static void main (String args[]) throws Exception {
-        String[] input = { ",sa,se,-ta,-te/pa,-pe)", ",sa)", ",/pa,-pe)" };
+        String[] input = { ",sa,se,-ta,-te/pa,-pe)", ",sa)", ",/pa,-pe)"};
         Tree tree;
 
         for (int i = 0; i < input.length; i++) {
diff --git a/src/main/java/de/ids_mannheim/korap/query/parse/cosmas/c2ps_opREG.java b/src/main/java/de/ids_mannheim/korap/query/parse/cosmas/c2ps_opREG.java
new file mode 100644
index 0000000..a798647
--- /dev/null
+++ b/src/main/java/de/ids_mannheim/korap/query/parse/cosmas/c2ps_opREG.java
@@ -0,0 +1,235 @@
+package de.ids_mannheim.korap.query.parse.cosmas;
+
+import org.antlr.runtime.*;
+import org.antlr.runtime.tree.*;
+
+import de.ids_mannheim.korap.query.serialize.util.Antlr3DescriptiveErrorListener;
+import de.ids_mannheim.korap.util.StringUtils;
+
+/*
+ * 1. transforms and encodes a regular COSMAS II like expression #REG(regexpr)
+ *    into a AST tree -> encode().
+ * 2. transforms tree into the corresponding Koral:token/Koral:term, like:
+ *    e.g. #REG(abc[']?s) ->
+ *     {
+ *      "@type": "koral:term",
+ *      "match": "match:eq",
+ *      "type" : "type:regex",
+ *      "key"  : "abc[']?s",
+ *      "layer": "orth"
+ *     }...
+ *
+ * - see doc: http://korap.github.io/Koral/
+ * - generation of koral:term -> processOPREG().
+ * 06.09.23/FB
+ */
+
+public class c2ps_opREG
+
+{
+	private static boolean DEBUG = false;
+
+	/* 
+	 * encode():
+	 * 
+	 * input = e.g. "#REG('abc(d|e)*')" -> return AST = (OPREG abc(d|e)*):
+	 * 
+	 * Returned String: no enclosing "..." needed, so no escaping of " nor \ needed.
+	 * 06.09.23/FB
+	 */
+	public static Tree encode (String input, int tokenType) 
+	    
+	{
+    if( DEBUG )
+    	{
+    	System.out.printf("opREG.encode: input = >>%s<<, token type=%d.\n", input, tokenType); 
+    	System.out.flush();
+    	}
+    
+    if( input.substring(0, 5).compareToIgnoreCase("#REG(") != 0 || input.charAt(input.length()-1) != ')' )
+    	{
+    	// error: '#REG(' and ')' not found: return input unchanged.
+        if( DEBUG ) System.out.printf("opREG.encode: unexpected input = >>%s<<: nothing encoded!\n", input);
+    	return new CommonTree(new CommonToken(tokenType, input));
+    	}
+        
+
+    StringBuffer sb = new StringBuffer(input.substring(5));
+    sb.deleteCharAt(sb.length()-1);
+        
+	// #REG("a"), #REG(a), #REG('a') -> >>a<<.
+    // enclosing ".." are appended at the end of this function.
+    // a. remove blanks around ".." and '..',
+    //    e.g. a. #REG( ' abc ' ) -> #REG(' abc ').
+    
+    StringUtils.removeBlanksAtBothSides(sb);
+    
+	if( sb.charAt(0) == '\'' || sb.charAt(0) == '"')
+		{
+		// remove pairwise at both ends.
+		sb.deleteCharAt(0);
+		if( sb.charAt(sb.length()-1) == '\'' || sb.charAt(sb.length()-1) == '"' )
+			sb.deleteCharAt(sb.length()-1);
+		}
+    
+	// b. remove blanks inside '..' or "..",
+    //    E.g. #REG(' abc ') -> #REG('abc'):
+	
+	StringUtils.removeBlanksAtBothSides(sb);
+    
+	/* unescape >>'<<, >>"<< and >>\<<.
+	 * e.g. #REG('that\'s') -> "that\'s" -> >>that's<<.
+	 */
+
+	for(int i=0; i<sb.length()-1; i++)
+		{
+		if( sb.charAt(i) == '\\' && 
+			(sb.charAt(i+1) == '\'' || sb.charAt(i+1) == '"' || sb.charAt(i+1) == '\\' ))
+			sb.deleteCharAt(i);
+		}
+	
+	/* old version:
+	for(int i=0; i<sb.length()-1; i++)
+		{
+		if( sb.charAt(i) == '\\' && sb.charAt(i+1) == '\'' )
+			sb.deleteCharAt(i);
+		}
+	*/
+	
+	/* old version:
+	 * encode2DoubleQuoted(sb);
+	 */
+	
+	if( DEBUG ) 
+    	System.out.printf("opREG.encode: encoded = >>%s<<.\n", sb.toString());
+	        
+    return new CommonTree(new CommonToken(tokenType, sb.toString()));
+     
+	} // encode
+
+	/*
+	 * printTokens:
+	 * Notes:
+	 * - must build a separate CommonTokenStream here, because
+	 *   tokens.fill() will consume all tokens.
+	 * - prints to stdout list of tokens from lexer.
+	 * - mainly for debugging.
+	 * 14.09.23/FB
+	 * 
+	 */
+	
+	private static void printTokens(String query, Antlr3DescriptiveErrorListener errorListener)
+	
+		{
+	    ANTLRStringStream 
+	    	ss = new ANTLRStringStream(query);
+	    c2psLexer 
+	    	lex = new c2psLexer(ss);
+	    org.antlr.runtime.CommonTokenStream 
+	    	tokens = new org.antlr.runtime.CommonTokenStream(lex); // v3
+	    
+        lex.setErrorReporter(errorListener);
+
+	    // get all tokens from lexer:
+		tokens.fill();
+	    
+		System.out.printf("opREG.check: no. of tokens = %d.\n",  tokens.size()); 
+	    for(int i=0; i<tokens.size(); i++)
+	        	System.out.printf("opREG.check: token[%2d] = %s.\n",  i, tokens.get(i).getText()); 
+	    
+		} // printTokens
+	
+		/* check:
+		 * Notes:
+		 * - must build a separate CommonTokenStream here, because
+		 *   tokens.fill() will consume all tokens.
+		 */
+	
+	   public static Tree check (String query, int index) 
+	   
+	   {
+	        ANTLRStringStream 
+	        	ss = new ANTLRStringStream(query);
+	        c2psLexer 
+	        	lex = new c2psLexer(ss);
+	        org.antlr.runtime.CommonTokenStream 
+	        	tokens = new org.antlr.runtime.CommonTokenStream(lex); // v3
+	        c2psParser 
+	        	g = new c2psParser(tokens);
+	        Tree 
+	        	tree = null;
+           Antlr3DescriptiveErrorListener errorListener =
+                   new Antlr3DescriptiveErrorListener(query);
+
+           // Use custom error reporters for lex for use in printTokens(lex), or programm will break
+           // by broken input, e.g. >>#REG(\" a"s\")<<.
+           lex.setErrorReporter(errorListener);
+           ((c2psParser) g).setErrorReporter(errorListener);
+
+           if( DEBUG )
+		       {
+		        //System.out.format("opREG.check: input='%s', index=%d.\n", query, index); 
+		        printTokens(query, errorListener);
+		        System.out.flush();
+		       }
+
+
+           try {
+               c2psParser.c2ps_query_return 
+               		c2Return = ((c2psParser) g).c2ps_query(); // statt t().
+               
+               // AST Tree anzeigen:
+               tree = (Tree) c2Return.getTree();
+               //if (DEBUG) 
+               // 	System.out.printf("opREG.check: tree = '%s'.\n", tree.toStringTree());
+           	}
+           catch (RecognitionException e) {
+               System.err.printf("c2po_opREG.check: Recognition Exception!\n");
+           	}
+
+	     return tree;
+	    } // check
+	
+
+	/** 
+	 * main
+	 */
+	   
+    public static void main (String args[]) throws Exception 
+    
+    {
+    	String input[] = {	"#REG(abc)", 
+    						"#REG(def's)", 
+    						"#REG(  def's  )", 		// all blanks should be removed.
+    						"#REG( ' def\\'s ' )", 	// same
+    						"#REG( \" def's \" )", // same
+    						"#REG(abc[\"]ef)", 
+    						"#REG('abc')", 			// ' fehlt: generates Syntax Error .
+    						"#REG('abc\')",			// User input = #REG('abc\') : OK, nothing escaped.
+     						"#REG('abc\'')",			// User input = #REG('abc\') : OK, nothing escaped.
+     					   	"#REG('abc\\')",		// User input = #REG('abc\') : OK, same behavior: \\ == \.
+    						"#REG((a|b))",			// broken input, should use ".." or '..'.
+    						"#REG('(a|b)')",		// OK.
+    						"#REG(\"(a|b)\")",		// OK.
+    						"#REG(^[A-Z]+abc[\']*ung$)",
+    						"#REG('ab(cd|ef)*')", 
+    						"#REG('abc(def|g)*[)(]')",
+    						"#REG(\"abc(def|g)*[)(]\")",
+							"#REG('abc[\"]')",		// User input = #REG('abc["]') : OK, needs escape => #REG("...\"...")
+							"#REG(\"abc[\\\"]\")",	// User input = #REG("abc["]") : broken because of 2nd " -> syntax error.
+							"#REG(\"abc[\\\"]\")",	// User input = #REG("abc[\"]"): OK, already escaped by user => #REG("...\"...")
+							"#REG(\"abc[\\\\\"]\")"	// User input = #REG("abc[\\"]") : broken. with escaped "    => #REG("...\"...")
+							};
+    	Tree tree;
+   
+    	for (int i = 0; i < input.length; i++) 
+        	{
+            System.out.printf("c2ps_opREG: Parsing input %02d: >>%s<<\n", i, input[i]);
+            tree = check(input[i], 0);
+            System.out.printf("c2ps_opREG: tree %02d: >>%s<<.\n\n", i, tree.toStringTree());
+            }
+
+    	
+    } // main
+
+}
diff --git a/src/main/java/de/ids_mannheim/korap/query/serialize/Cosmas2QueryProcessor.java b/src/main/java/de/ids_mannheim/korap/query/serialize/Cosmas2QueryProcessor.java
index 69a6293..8bbfa35 100644
--- a/src/main/java/de/ids_mannheim/korap/query/serialize/Cosmas2QueryProcessor.java
+++ b/src/main/java/de/ids_mannheim/korap/query/serialize/Cosmas2QueryProcessor.java
@@ -15,6 +15,7 @@
 import de.ids_mannheim.korap.query.serialize.util.KoralObjectGenerator;
 import de.ids_mannheim.korap.query.serialize.util.ResourceMapper;
 import de.ids_mannheim.korap.query.serialize.util.StatusCodes;
+import de.ids_mannheim.korap.util.StringUtils;
 
 import org.antlr.runtime.ANTLRStringStream;
 import org.antlr.runtime.RecognitionException;
@@ -127,7 +128,6 @@
     public static Pattern wildcardPlusPattern = Pattern.compile("([+])");
     public static Pattern wildcardQuestionPattern = Pattern.compile("([?])");
 
-
     /**
      * @param tree
      *            The syntax tree as returned by ANTLR
@@ -142,6 +142,7 @@
         process(query);
         if (DEBUG) { 
             log.debug(">>> " + requestMap.get("query") + " <<<");
+        System.out.printf("Cosmas2QueryProcessor: >>%s<<.\n",  requestMap.get("query"));
         }
     }
 
@@ -151,14 +152,19 @@
         Tree tree = null;
         tree = parseCosmasQuery(query);
         if (DEBUG) { 
+        	System.out.printf("\nProcessing COSMAS II query: %s.\n\n", query);
             log.debug("Processing CosmasII query: " + query);
         }
-        if (tree != null) {
-            if (DEBUG) { 
-                log.debug("ANTLR parse tree: " + tree.toStringTree());
-            }
+        if (tree != null) 
+        	{
+            
+                if (DEBUG) {
+            	log.debug("ANTLR parse tree: " + tree.toStringTree());
+                System.out.printf("\nANTLR parse tree: %s.\n\n",  tree.toStringTree());
+                }
+
             processNode(tree);
-        }
+        	}
     }
 
 
@@ -278,6 +284,11 @@
         if (nodeCat.equals("OPBED")) {
             processOPBED(node);
         }
+        
+        if (nodeCat.equals("OPREG")) {
+            processOPREG(node);
+        }
+        
         objectsToPop.push(stackedObjects);
         toWrapsToPop.push(stackedToWrap);
 
@@ -444,6 +455,88 @@
         }
     }
 
+        /* processOPREG:
+         * 
+         * - input Node structure is: (OPREG "regexpr").
+		 * - transforms tree into the corresponding Koral:token/Koral:term, like:
+		 *    e.g. #REG(abc[']?s) ->
+		 *     {
+		 *      "@type": "koral:term",
+		 *      "match": "match:eq",   // optional
+		 *      "type" : "type:regex",
+		 *      "key"  : "abc[']?s",
+		 *      "layer": "orth"
+		 *     }.
+		 *
+		 * - see doc: http://korap.github.io/Koral/
+		 * 
+		 * 06.09.23/FB
+		 */
+    	
+    private void processOPREG (Tree node) 
+    
+    {
+        int 
+        	nChild = node.getChildCount() - 1;
+        Tree
+        	nodeChild = node.getChild(0);
+        boolean
+        	bDebug = false;
+        
+        if( DEBUG )
+        	{
+        	//System.out.printf("Debug: processOPREG: node='%s' nChilds=%d.\n", node.toStringTree(), nChild+1);
+            System.out.printf("Debug: processOPREG: child: >>%s<< cat=%s type=%d.\n",
+            		nodeChild.getText(), getNodeCat(node), nodeChild.getType());
+            }
+        
+        // empty case (is that possible?):
+        if( nChild < 0 )
+        	return;
+        
+        // see processOPWF_OPWF_OPLEM
+        // for how to insert regexpr into Koral JSON-LD
+        
+        Map<String, Object> 
+        	token = KoralObjectGenerator.makeToken();
+        
+        objectStack.push(token);
+        stackedObjects++;
+        
+        Map<String, Object> 
+        	fieldMap = KoralObjectGenerator.makeTerm();
+        
+        token.put("wrap", fieldMap);
+        
+        // make category-specific fieldMap entry:
+        /*
+        System.out.printf("Debug: processOPREG: before replaceALL: >>%s<<.\n", nodeChild.toStringTree());
+        String 
+        	value = nodeChild.toStringTree().replaceAll("\"", "");
+        System.out.printf("Debug: processOPREG: after  replaceALL: >>%s<<.\n", value);
+        */
+        
+        /* replace replaceALL() by replaceIfNotEscaped() to delete every occurence of >>"<<
+         * which is not escaped by >>\<<, as it is important to keep the escaped sequence for
+         * the argument of #REG().
+         * This is not possible with replaceALL().
+         */
+        String
+        	value = nodeChild.toStringTree(); // old version: replaceDoubleQuotes(nodeChild.toStringTree());
+        
+        if( bDebug )
+        	System.out.printf("Debug: processOPREG: key: >>%s<<.\n", value);
+        
+        fieldMap.put("key",   value);
+        fieldMap.put("layer", "orth");
+        fieldMap.put("type",  "type:regex");
+        fieldMap.put("match", "match:eq");
+        
+        // decide where to put (objPos=1, not clear why, but it works only like that - 20.09.23/FB):
+        putIntoSuperObject(token,1); 
+        
+    } // processOPREG
+
 
     private void processOPNHIT (Tree node) {
         Integer[] classRef = new Integer[] { classCounter + 128 + 1,
@@ -1511,19 +1604,40 @@
 
 
     @SuppressWarnings("unchecked")
-    private void putIntoSuperObject (Map<String, Object> object,
-            int objStackPosition) {
-        if (objectStack.size() > objStackPosition) {
+    private void putIntoSuperObject (Map<String, Object> object, int objStackPosition) 
+    
+    	{
+    	if( DEBUG )
+	    	{
+	    	System.out.printf("Debug: putIntosuperObject(<>,int): objectStack.size=%d objStackPos=%d object=%s.\n", 
+	    				objectStack.size(), objStackPosition, object == null ? "null" : "not null");
+	    
+	    	if( objectStack != null && objectStack.size() > 0 )
+	    		System.out.printf("Debug: putIntosuperObject: objectStack = %s.\n",  objectStack.toString());
+	    	
+	    	if( invertedOperandsLists != null )
+	    		System.out.printf("Debug: putIntosuperObject: invertedOperandsLists: [%s].\n", invertedOperandsLists.toString());
+	    	}
+
+
+    	if (objectStack.size() > objStackPosition) 
+        	{
             ArrayList<Object> topObjectOperands =
-                    (ArrayList<Object>) objectStack.get(objStackPosition)
-                            .get("operands");
-            if (!invertedOperandsLists.contains(topObjectOperands)) {
+                    (ArrayList<Object>) objectStack.get(objStackPosition).get("operands");
+            
+            if( DEBUG )
+            	System.out.printf("Debug: putIntosuperObject: topObjectOperands = [%s].\n", topObjectOperands == null ? "null" : "not null");
+            
+            objectStack.get(objStackPosition);
+            
+            if (!invertedOperandsLists.contains(topObjectOperands)) 
+            	{
                 topObjectOperands.add(object);
-            }
+            	}
             else {
                 topObjectOperands.add(0, object);
-            }
-        }
+            	}
+        	}
         else {
             requestMap.put("query", object);
         }
@@ -1618,7 +1732,8 @@
 
 
     private Tree parseCosmasQuery (String query) {
-        query = rewritePositionQuery(query);
+        
+    	query = rewritePositionQuery(query);
         Tree tree = null;
         Antlr3DescriptiveErrorListener errorListener =
                 new Antlr3DescriptiveErrorListener(query);
@@ -1627,16 +1742,23 @@
             c2psLexer lex = new c2psLexer(ss);
             org.antlr.runtime.CommonTokenStream tokens =
                     new org.antlr.runtime.CommonTokenStream(lex); // v3
+            
+           // System.out.printf("parseCosmasQuery: tokens = %d\n",  tokens.size());
+           // System.out.printf("parseCosmasQuery: tokens = %s\n",  tokens.toString());
+           
             parser = new c2psParser(tokens);
+           
             // Use custom error reporters
             lex.setErrorReporter(errorListener);
             ((c2psParser) parser).setErrorReporter(errorListener);
+            
             c2psParser.c2ps_query_return c2Return =
                     ((c2psParser) parser).c2ps_query(); // statt t().
+
             // AST Tree anzeigen:
             tree = (Tree) c2Return.getTree();
             if (DEBUG) log.debug(tree.toStringTree());
-        }
+            }
         catch (RecognitionException e) {
             log.error(
                     "Could not parse query. Please make sure it is well-formed.");
diff --git a/src/main/java/de/ids_mannheim/korap/query/serialize/QuerySerializer.java b/src/main/java/de/ids_mannheim/korap/query/serialize/QuerySerializer.java
index 8294dca..94bf15d 100644
--- a/src/main/java/de/ids_mannheim/korap/query/serialize/QuerySerializer.java
+++ b/src/main/java/de/ids_mannheim/korap/query/serialize/QuerySerializer.java
@@ -73,7 +73,9 @@
     private List<Object> errors;
     private List<Object> warnings;
     private List<Object> messages;
-
+    
+    private boolean DEBUG = false;
+    
     public QuerySerializer () {
         this.errors = new ArrayList<>();
         this.warnings = new ArrayList<>();
@@ -102,6 +104,8 @@
         int i = 0;
         String[] queries = null;
         String ql = "poliqarpplus";
+        boolean bDebug = true;
+
         if (args.length < 2) {
             System.err
                     .println("Usage: QuerySerializer \"query\" queryLanguage");
@@ -114,7 +118,9 @@
         for (String q : queries) {
             i++;
             try {
-                jg.run(q, ql);
+		if( bDebug ) System.out.printf("QuerySerialize: query = >>%s<< lang = %s.\n", q, ql);
+            	
+		jg.run(q, ql);
                 System.out.println();
             }
             catch (NullPointerException npe) {
@@ -140,6 +146,9 @@
      * @throws IOException
      */
     public void run (String query, String queryLanguage) throws IOException {
+
+	ast.verbose = DEBUG ? true : false; // debugging: 01.09.23/FB
+
         if (queryLanguage.equalsIgnoreCase("poliqarp")) {
             ast = new PoliqarpPlusQueryProcessor(query);
         }
diff --git a/src/main/java/de/ids_mannheim/korap/util/StringUtils.java b/src/main/java/de/ids_mannheim/korap/util/StringUtils.java
new file mode 100644
index 0000000..29410d1
--- /dev/null
+++ b/src/main/java/de/ids_mannheim/korap/util/StringUtils.java
@@ -0,0 +1,157 @@
+package de.ids_mannheim.korap.util;
+
+/* general String manipulation functions moved
+ * from de.ids_mannheim.de.korap.query.parse.cosmas.c2ps_opREG.java and Cosmas2QueryProcessor.java.
+ * 24.10.23/FB
+ */
+
+public final class StringUtils {
+
+	private static final boolean DEBUG = false;
+	
+	/**
+	 * replaceIfNotEscaped:
+	 * - kind of adhoc alternative to String.replaceAll().
+	 * - replaces every occurence of >>"<< in buf IF it isn't escaped by >>\<<.
+	 * Notes:
+	 * - first intention: replace String.replaceALL() in processOPREG() because
+	 *   replaceALL() cannot be used in that special case.
+	 * Returns the replaced string.
+	 * 25.09.23/FB
+	 */
+	   
+	public static String replaceIfNotEscaped(String buf)
+	
+	{
+	StringBuffer
+		sb = new StringBuffer(buf);
+	
+	for(int i=0; i<sb.length(); i++)
+		{
+		//System.out.printf("ssb.length=%d ssb=%s.\n",  ssb.length(), ssb);
+		if( sb.codePointAt(i) == '"' && (i==0 || sb.codePointBefore(i) != '\\') )
+			{
+			sb.deleteCharAt(i);
+			i--;
+			}
+		}
+	
+	return sb.toString();
+	
+	} // replaceIfNotEscaped
+
+	
+    /**
+	 * replaceDoubleQuotes:
+	 * - kind of adhoc enhanced replacement function for >>"<< for #REG(expr)
+	 *   instead of String.replaceAll().
+	 * - replaces every occurence of >>"<< in buf that is not escaped by >>\<<.
+	 * - If the >>"<< is escaped, the escape char is removed: >>\"<< -> >>"<<.
+	 * Notes:
+	 * - the converted string is intented to be greped.
+	 * E.g.:  
+	 * - >>"\"Abend\"-Ticket"<< -> >>"Abend"-Ticket<<.
+	 * Returns the replaced string.
+	 * 26.09.23/FB
+	 */
+	   
+	public static String replaceDoubleQuotes(String buf)
+	
+	{
+	StringBuffer
+		sb = new StringBuffer(buf);
+	
+	if( DEBUG ) System.out.printf("replaceDoubleQuotes:  input: >>%s<<.\n", buf);
+	
+	for(int i=0; i<sb.length(); i++)
+		{
+		//System.out.printf("ssb.length=%d ssb=%s.\n",  ssb.length(), ssb);
+		if( sb.codePointAt(i) == '\\' )
+			{
+			if( i+1 < sb.length() ) 
+				{
+				if( sb.codePointAt(i+1) == '"') // >>\"<< -> >>"<<.
+					sb.deleteCharAt(i);
+				else if( sb.codePointAt(i+1) == '\\' ) // >>\\<< unchanged.
+					i++; // keep >>\\<< unchanged.
+				}
+			}
+		else if( sb.codePointAt(i) == '"' )
+			{
+			sb.deleteCharAt(i); // unescaped >>"<< is removed.
+			i--;
+			}
+		}
+	
+	if( DEBUG ) System.out.printf("replaceDoubleQuotes: output: >>%s<<.\n", sb.toString());
+	
+	return sb.toString();
+	
+	} // replaceDoubleQuotes
+	
+	/* encode2DoubleQuoted:
+	 * transforms an unquoted string into an double quoted string
+	 * and escapes >>"<< and >>/<<.
+	 * E.g. >>.."..<<  -> >>"..\".."<<.
+	 * E.g. >>..\..<<  -> >>"..\\.."<<.
+	 * E.g. >>..\"..<< -> >>"..\\\".."<<, etc.
+	 * 
+	 * escaping >>"<< and >>\<<, because they will be
+	 * enclosed in >>"..."<<.
+	 * >>"<<   -> >>\"<<
+	 * >>\<<   -> >>\\<<
+	 * 
+	 * 28.09.23/FB
+	 * 
+	 * E.g. from previous, olddated version:
+	 * \\" -> \\\"
+	 * \\\" -> \\\"
+	 */
+	
+	public static void encode2DoubleQuoted(StringBuffer sb)
+	
+	{
+	if( DEBUG ) System.out.printf("encode2DoubleQuoted:  input = >>%s<<.\n", sb.toString());
+	
+	for(int i=0; i<sb.length()-1; i++)
+    	{	
+		if( sb.charAt(i) == '\\' )
+    		{
+			sb.insert(i,  '\\');
+			i++;
+			}	
+    	else if( sb.charAt(i) == '"')
+        	{ 
+        	sb.insert(i, '\\');	
+        	i++; 
+        	}
+    	}
+
+	// enclose reg. expr. with "..." before returning:
+	sb.insert(0, '"');
+	sb.append('"');
+	
+	if( DEBUG ) System.out.printf("encode2DoubleQuoted: output = >>%s<<.\n", sb.toString());
+	}  // encode2DoubleQuoted
+	
+	/*
+	 * removeBlanksAtBothSides
+	 * 28.09.23/FB
+	 */
+	
+	public static void removeBlanksAtBothSides(StringBuffer sb)
+	
+	{
+	int len;
+		
+    // remove leading blanks: >>  abc  << -> >>abc  <<:
+	while( sb.length() > 0 && sb.charAt(0) == ' ')
+		sb.deleteCharAt(0);
+	
+	// remove trailing blanks: >>abc  << -> >>abc<<:
+	while( (len=sb.length()) > 0 && sb.charAt(len-1) == ' ' )
+		sb.deleteCharAt(len-1);
+	
+	} // removeBlanksAtBothSides
+	
+}
diff --git a/src/test/java/de/ids_mannheim/korap/test/cosmas2/Cosmas2QueryProcessorTest.java b/src/test/java/de/ids_mannheim/korap/test/cosmas2/Cosmas2QueryProcessorTest.java
index 0722c9b..759810f 100644
--- a/src/test/java/de/ids_mannheim/korap/test/cosmas2/Cosmas2QueryProcessorTest.java
+++ b/src/test/java/de/ids_mannheim/korap/test/cosmas2/Cosmas2QueryProcessorTest.java
@@ -14,12 +14,15 @@
 
 import static org.junit.Assert.*;
 
+import static de.ids_mannheim.korap.query.parse.cosmas.c2ps_opREG.*;
+import de.ids_mannheim.korap.util.StringUtils;
 /**
  * Tests for JSON-LD serialization of Cosmas II queries.
  * 
  * @author Joachim Bingel (bingel@ids-mannheim.de)
  * @author Nils Diewald
- * @version 1.1
+ * @author Franck Bodmer
+ * @version 1.2 - 21.09.23
  */
 public class Cosmas2QueryProcessorTest {
 
@@ -1702,4 +1705,224 @@
         assertEquals("s", res.at("/query/distances/0/key").asText());
         assertEquals("operation:sequence", res.at("/query/operation").asText());
     }
+    
+    /* Testing #REG(expr), #REG('expr') and #REG("expr").
+     * 21.09.23/FB
+     */
+     
+    @Test
+    public void testREG () throws JsonProcessingException, IOException {
+    	
+    	boolean debug = false;
+    	
+        query = "#REG(^aber$)";
+        qs.setQuery(query, "cosmas2");
+        res = mapper.readTree(qs.toJSON());
+        
+        if( debug ) System.out.printf("testREG: query: >>%s<< -> key: >>%s<<.\n",  query, res.at("/query/wrap/key").asText());
+        assertEquals("koral:token", res.at("/query/@type").asText());
+        assertEquals("koral:term",  res.at("/query/wrap/@type").asText());
+        assertEquals("^aber$",      res.at("/query/wrap/key").asText());
+        assertEquals("orth",        res.at("/query/wrap/layer").asText());
+        assertEquals("type:regex",  res.at("/query/wrap/type").asText());
+        assertEquals("match:eq",    res.at("/query/wrap/match").asText());
+
+        query = "#REG('été\\'')";
+        qs.setQuery(query, "cosmas2");
+        res = mapper.readTree(qs.toJSON());
+        
+        if( debug ) System.out.printf("testREG: query: >>%s<< -> key: >>%s<<.\n",  query, res.at("/query/wrap/key").asText());
+        assertEquals("été'"	,       res.at("/query/wrap/key").asText());
+        assertEquals("type:regex",  res.at("/query/wrap/type").asText());
+        assertEquals("orth",        res.at("/query/wrap/layer").asText());
+
+        query = "#REG('été\' )";
+        qs.setQuery(query, "cosmas2");
+        res = mapper.readTree(qs.toJSON());
+        
+        if( debug ) System.out.printf("testREG: query: >>%s<< -> key: >>%s<<.\n",  query, res.at("/query/wrap/key").asText());
+        assertEquals("été"	,       res.at("/query/wrap/key").asText());
+        assertEquals("type:regex",  res.at("/query/wrap/type").asText());
+        assertEquals("orth",        res.at("/query/wrap/layer").asText());
+
+        query = "#REG('été\\')";
+        qs.setQuery(query, "cosmas2");
+        res = mapper.readTree(qs.toJSON());
+        
+        if( debug ) System.out.printf("testREG: query: >>%s<< -> key: >>%s<<.\n",  query, res.at("/query/wrap/key").asText());
+        assertEquals("été\\",       res.at("/query/wrap/key").asText());
+        assertEquals("type:regex",  res.at("/query/wrap/type").asText());
+        assertEquals("orth",        res.at("/query/wrap/layer").asText());
+
+        query = "#REG(l'été)";
+        qs.setQuery(query, "cosmas2");
+        res = mapper.readTree(qs.toJSON());
+        
+        if( debug ) System.out.printf("testREG: query: >>%s<< -> key: >>%s<<.\n",  query, res.at("/query/wrap/key").asText());
+        assertEquals("l'été",       res.at("/query/wrap/key").asText());
+        assertEquals("type:regex",  res.at("/query/wrap/type").asText());
+        assertEquals("orth",        res.at("/query/wrap/layer").asText());
+
+        query = "#REG(l\\'été)";
+        qs.setQuery(query, "cosmas2");
+        res = mapper.readTree(qs.toJSON());
+        
+        if( debug ) System.out.printf("testREG: query: >>%s<< -> key: >>%s<<.\n",  query, res.at("/query/wrap/key").asText());
+        assertEquals("l'été",       res.at("/query/wrap/key").asText());
+        assertEquals("type:regex",  res.at("/query/wrap/type").asText());
+        assertEquals("orth",        res.at("/query/wrap/layer").asText());
+
+        query = "#REG(\"l'été\")";
+        qs.setQuery(query, "cosmas2");
+        res = mapper.readTree(qs.toJSON());
+        
+        if( debug ) System.out.printf("testREG: query: >>%s<< -> key: >>%s<<.\n",  query, res.at("/query/wrap/key").asText());
+        assertEquals("l'été",       res.at("/query/wrap/key").asText());
+        assertEquals("type:regex",  res.at("/query/wrap/type").asText());
+        assertEquals("orth",        res.at("/query/wrap/layer").asText());
+
+        query = "#REG(\"l\\'été\")";
+        qs.setQuery(query, "cosmas2");
+        res = mapper.readTree(qs.toJSON());
+        
+        if( debug ) System.out.printf("testREG: query: >>%s<< -> key: >>%s<<.\n",  query, res.at("/query/wrap/key").asText());
+        assertEquals("l'été",       res.at("/query/wrap/key").asText());
+        assertEquals("type:regex",  res.at("/query/wrap/type").asText());
+        assertEquals("orth",        res.at("/query/wrap/layer").asText());
+
+        query = "#REG('l\\'été.*')";
+        qs.setQuery(query, "cosmas2");
+        res = mapper.readTree(qs.toJSON());
+        
+        if( debug ) System.out.printf("testREG: query: >>%s<< -> key: >>%s<<.\n",  query, res.at("/query/wrap/key").asText());
+        assertEquals("l'été.*",     res.at("/query/wrap/key").asText());
+        assertEquals("type:regex",  res.at("/query/wrap/type").asText());
+        assertEquals("orth",        res.at("/query/wrap/layer").asText());
+
+        query = "#REG('\\\"été\\\"$')"; // means user input is #REG('\"été\"').
+        qs.setQuery(query, "cosmas2");
+        res = mapper.readTree(qs.toJSON());
+        
+        if( debug ) System.out.printf("testREG: query: >>%s<< -> key: >>%s<<.\n",  query, res.at("/query/wrap/key").asText());
+        assertEquals("\"été\"$",    res.at("/query/wrap/key").asText());
+        assertEquals("type:regex",  res.at("/query/wrap/type").asText());
+        assertEquals("orth",        res.at("/query/wrap/layer").asText());
+
+        // checks the >>"<<:
+        query = "#REG(\\\"Abend\\\"-Ticket)"; // means user input = #REG(\"Abend\"-Ticket).
+        qs.setQuery(query, "cosmas2");
+        res = mapper.readTree(qs.toJSON());
+
+        if( debug ) System.out.printf("testREG: query: >>%s<< -> key: >>%s<<.\n",  query, res.at("/query/wrap/key").asText());
+        assertEquals("\"Abend\"-Ticket",res.at("/query/wrap/key").asText());
+        assertEquals("type:regex",  res.at("/query/wrap/type").asText());
+        assertEquals("orth",        res.at("/query/wrap/layer").asText());
+
+        query = "#REG('\\\"Abend\\\"-Ticket')"; // means user input = #REG(\"Abend\"-Ticket).
+        qs.setQuery(query, "cosmas2");
+        res = mapper.readTree(qs.toJSON());
+
+        if( debug ) System.out.printf("testREG: query: >>%s<< -> key: >>%s<<.\n",  query, res.at("/query/wrap/key").asText());
+        assertEquals("\"Abend\"-Ticket",res.at("/query/wrap/key").asText());
+        assertEquals("type:regex",  res.at("/query/wrap/type").asText());
+        assertEquals("orth",        res.at("/query/wrap/layer").asText());
+
+        query = "#REG('\"Abend\"-Ticket')"; // means user input = #REG('"Abend"-Ticket').
+        qs.setQuery(query, "cosmas2");
+        res = mapper.readTree(qs.toJSON());
+        
+        if( debug ) System.out.printf("testREG: query: >>%s<< -> key: >>%s<<.\n",  query, res.at("/query/wrap/key").asText());
+        assertEquals("\"Abend\"-Ticket",res.at("/query/wrap/key").asText()); // key must be escaped, because converted to in "...".
+        assertEquals("type:regex",  res.at("/query/wrap/type").asText());
+        assertEquals("orth",        res.at("/query/wrap/layer").asText());
+
+        query = "#REG(\"\\\"Abend\\\"-Ticket\")"; // means user input = #REG("\"Abend\"-Ticket") -> key: >>"Abend"-Ticket<<.
+        qs.setQuery(query, "cosmas2");
+        res = mapper.readTree(qs.toJSON());
+        
+        if( debug ) System.out.printf("testREG: query: >>%s<< -> key: >>%s<<.\n",  query, res.at("/query/wrap/key").asText());
+        assertEquals("\"Abend\"-Ticket",res.at("/query/wrap/key").asText());
+        assertEquals("type:regex",  res.at("/query/wrap/type").asText());
+        assertEquals("orth",        res.at("/query/wrap/layer").asText());
+        //
+
+        query = "#REG('^(a|b)?+*$')";
+        qs.setQuery(query, "cosmas2");
+        res = mapper.readTree(qs.toJSON());
+        
+        assertEquals("^(a|b)?+*$",     res.at("/query/wrap/key").asText());
+        assertEquals("type:regex",  res.at("/query/wrap/type").asText());
+        assertEquals("orth",        res.at("/query/wrap/layer").asText());
+
+        query = "#REG(\"[A-Z()]\")";
+        qs.setQuery(query, "cosmas2");
+        res = mapper.readTree(qs.toJSON());
+        
+        assertEquals("[A-Z()]",     res.at("/query/wrap/key").asText());
+        assertEquals("orth",        res.at("/query/wrap/layer").asText());
+        assertEquals("type:regex",  res.at("/query/wrap/type").asText());
+
+        query = "#REG(^klein.*) /s0 #REG(A.*ung)";
+        qs.setQuery(query, "cosmas2");
+        res = mapper.readTree(qs.toJSON());
+        
+        //System.out.printf("Debug: res: pretty: %s.\n",  res.toPrettyString());
+        
+        assertEquals("^klein.*",    res.at("/query/operands/0/operands/0/wrap/key").asText());
+        assertEquals("orth",        res.at("/query/operands/0/operands/0/wrap/layer").asText());
+        assertEquals("type:regex",  res.at("/query/operands/0/operands/0/wrap/type").asText());
+        
+        assertEquals("A.*ung",      res.at("/query/operands/1/operands/0/wrap/key").asText());
+        assertEquals("orth",        res.at("/query/operands/1/operands/0/wrap/layer").asText());
+        assertEquals("type:regex",  res.at("/query/operands/1/operands/0/wrap/type").asText());
+ 
+        query = "#REG( ) ";
+        qs.setQuery(query, "cosmas2");
+        res = mapper.readTree(qs.toJSON());
+ 
+        assertTrue(res.toString().contains("Failing to parse"));
+        
+        query = "#REG('' ) ";
+        qs.setQuery(query, "cosmas2");
+        res = mapper.readTree(qs.toJSON());
+ 
+        assertTrue(res.toString().contains("Failing to parse"));
+
+        query = "#REG(\"\") ";
+        qs.setQuery(query, "cosmas2");
+        res = mapper.readTree(qs.toJSON());
+ 
+        assertTrue(res.toString().contains("Failing to parse"));
+
+    }
+
+    @Test
+    public void testREGencode2DoubleQuoted () {
+        StringBuffer sb = new StringBuffer("..\"..");
+        StringUtils.encode2DoubleQuoted(sb);
+        assertEquals("\"..\\\"..\"",sb.toString());
+
+        sb = new StringBuffer("..\\..");
+        StringUtils.encode2DoubleQuoted(sb);
+        assertEquals("\"..\\\\..\"", sb.toString());
+
+        sb = new StringBuffer("..\"..");
+        StringUtils.encode2DoubleQuoted(sb);
+        assertEquals("\"..\\\"..\"", sb.toString());
+    }
+
+    @Test
+    public void testREGremoveBlanksAtBothSides () {
+        StringBuffer sb = new StringBuffer("    aabc cjs   ss   ");
+        StringUtils.removeBlanksAtBothSides(sb);
+        assertEquals("aabc cjs   ss",sb.toString());
+
+        sb = new StringBuffer("abc   ");
+        StringUtils.removeBlanksAtBothSides(sb);
+        assertEquals("abc",sb.toString());
+
+        sb = new StringBuffer("   abc");
+        StringUtils.removeBlanksAtBothSides(sb);
+        assertEquals("abc",sb.toString());
+    }
 }