| package de.ids_mannheim.korap.query.parse.cosmas; |
| |
| import org.antlr.runtime.*; |
| import org.antlr.runtime.tree.*; |
| import de.ids_mannheim.korap.query.parse.cosmas.c2ps_opPROXLexer; |
| import de.ids_mannheim.korap.query.parse.cosmas.c2ps_opPROX; |
| import de.ids_mannheim.korap.query.serialize.util.StatusCodes; |
| |
| /* |
| * parses prefixed and suffixed options of a search wordform. |
| * E.g. :fi:Hendrix:sa/-pe. |
| */ |
| |
| public class c2ps_opWF |
| |
| { |
| static final boolean bDebug = false; |
| |
| /* check: |
| * Arguments: |
| * bStrip: true: 'input' contains "wort" -> strip " away -> wort. |
| * false: 'input' contains no " -> nothing to strip. |
| * bLem: true: input contains a Lemma; generates tree ^(OPLEM...). |
| * false: input contains a Wordform; generates tree ^(OPWF...). |
| * input: may be a single Lemma or Wform or a list of Wforms. |
| */ |
| |
| public static Tree check (String input, boolean bStrip, boolean bLem, int pos) |
| { |
| if (bStrip) |
| input = input.substring(1, input.length() - 1); |
| |
| if (bLem && input.charAt(0) == '&') { |
| input = input.substring(1, input.length()); |
| //System.out.println("Lemma: strip '&' -> " + input); |
| } |
| |
| ANTLRStringStream ss = new ANTLRStringStream(input); |
| c2ps_opWFLexer lex = new c2ps_opWFLexer(ss); |
| CommonTokenStream tokens = new CommonTokenStream(lex); |
| c2ps_opWFParser g = new c2ps_opWFParser(tokens); |
| c2ps_opWFParser.searchWFs_return c2PQWFReturn = null; |
| c2ps_opWFParser.searchLEM_return c2PQLEMReturn = null; |
| |
| /* |
| System.out.println("check opWF:" + index + ": " + input); |
| System.out.flush(); |
| */ |
| |
| try { |
| if (bLem) |
| c2PQLEMReturn = g.searchLEM(pos); |
| else |
| c2PQWFReturn = g.searchWFs(pos); |
| } |
| catch (RecognitionException e) { |
| e.printStackTrace(); |
| } |
| |
| // AST Tree anzeigen: |
| Tree tree = bLem ? (Tree)c2PQLEMReturn.getTree() : (Tree)c2PQWFReturn.getTree(); |
| |
| if( bDebug && bLem ) |
| { |
| System.out.printf("c2ps_opWF.check: %s: '%s'.\n", bLem ? "opLEM" : "opWF", |
| tree.toStringTree() ); |
| System.out.flush(); |
| } |
| |
| return tree; |
| } |
| |
| |
| /* Wordform Encoding, e.g. to insert a Wordform into an AST. |
| * a) wf -> "wf". |
| * b) remove escape char before ':': abc\: -> abc:. |
| * Args: |
| * wf : wordform or lemma (expected lemma : "lemma" or "opts&lemma", |
| * the starting '&' has been removed before entering this function). |
| * tokenType : either OPWF or OPLEM. |
| * pos : start position of wf. |
| * Notes: |
| * - &opts&lemma : may contain wildcards as options in the &opts& section only. |
| * reject if wildcards appear in the &lemma section. |
| * Returns a Tree or an ErrorTree. |
| */ |
| public static Tree encode (String wf, int tokenType, int pos) |
| |
| { |
| //System.out.printf("c2ps_opWF.encode: wf='%s' tokenType=%d pos=%d.\n", wf, tokenType, pos); |
| |
| // b) |
| StringBuffer sbWF = new StringBuffer(wf); |
| |
| for (int i = 0; i < sbWF.length()-1; i++) |
| { |
| if (sbWF.charAt(i) == '\\' && sbWF.charAt(i + 1) == ':') |
| sbWF.deleteCharAt(i); |
| } |
| |
| // reject wildcards in lemmata: |
| |
| if( tokenType == c2ps_opWFLexer.OPLEM ) |
| { |
| boolean hasOpts = false; // true if a '&' occurs: e.g. "Fes+C&lemma" |
| boolean hasFound = false; // false for all wildcards found to the left of '&', true in all other cases. |
| |
| for(int i=0; i< sbWF.length(); i++) |
| { |
| if( sbWF.charAt(i) == '&' ) |
| { |
| hasOpts = true; |
| hasFound = false; |
| } |
| else if (sbWF.charAt(i) == '?' || sbWF.charAt(i) == '*' || sbWF.charAt(i) == '+' ) |
| { |
| hasFound = true; |
| } |
| } |
| |
| // error if hasFound==true: |
| if( hasFound ) |
| { |
| if( bDebug ) |
| System.out.printf("c2ps_opWF.encode: Syntax error: '%s' contains wildcards inside lemma expression!\n", wf); |
| return StatusCodes.buildErrorTree(wf, StatusCodes.ERR_LEM_WILDCARDS, pos); |
| } |
| } |
| |
| return new CommonTree(new CommonToken(tokenType, "\"" + sbWF.toString() + "\"")); |
| } |
| |
| /* |
| * main testprogram |
| */ |
| |
| public static void main (String args[]) throws Exception { |
| String[] input = { ":fi:Hendrix:sa", ":FiOlDs:été:sa", "&Gitarre", |
| "&Gitarre:sa/-pe", " \"Institut für \\:Deutsche\\: Sprache\" ", |
| ":Fi:der:-sa Wilde:-se Western:/se" }; |
| Tree tree; |
| boolean bLem; |
| |
| System.out.println("Tests von WF und Lemma-Optionen:\n"); |
| |
| for (int i = 0; i < input.length; i++) { |
| bLem = input[i].charAt(0) == '&' ? true : false; |
| |
| System.out.println(bLem ? "LEM: " : "WF: " + "input: " + input[i]); |
| |
| if (bLem) |
| tree = check(input[i], false, true, 0); // bStrip=false, bLem=true; |
| else |
| tree = check(input[i], false, false, 0); // bStrip=false, bLem=false. |
| |
| System.out.println(bLem ? "LEM: " : "WF: " + "AST : " |
| + tree.toStringTree() + "\n"); |
| } |
| |
| } // main |
| |
| } |