blob: 5400bec13db3fb17544ab1cd211f8d8137695efb [file] [log] [blame]
Joachim Bingel6003b852014-12-18 14:20:55 +00001package de.ids_mannheim.korap.query.parse.cosmas;
2
3import java.io.*;
4import org.antlr.runtime.*;
5import org.antlr.runtime.debug.DebugEventSocketProxy;
6import org.antlr.runtime.tree.*;
7
8/*
9 * parses prefixed and suffixed options of a search wordform.
10 * E.g. :fi:Hendrix:sa/-pe.
11 */
12
13public class c2ps_opWF
14
15{
16 /* Arguments:
17 * bStrip: true: 'input' contains "wort" -> strip " away -> wort.
18 * false: 'input' contains no " -> nothing to strip.
19 * bLem: true: input contains a Lemma; generates tree ^(OPLEM...).
20 * false: input contains a Wordform; generates tree ^(OPWF...).
21 * input: may be a single Lemma or Wform or a list of Wforms.
22 */
23
24 public static Tree check(String input, boolean bStrip, boolean bLem, int index)
25 {
26 if( bStrip )
27 input = input.substring(1, input.length()-1);
28
29 if( bLem && input.charAt(0) == '&' )
30 {
31 input = input.substring(1, input.length());
32 //System.out.println("Lemma: strip '&' -> " + input);
33 }
34
35 ANTLRStringStream
36 ss = new ANTLRStringStream(input);
37 c2ps_opWFLexer
38 lex = new c2ps_opWFLexer(ss);
39 CommonTokenStream tokens =
40 new CommonTokenStream(lex);
41 c2ps_opWFParser
42 g = new c2ps_opWFParser(tokens);
43 c2ps_opWFParser.searchWFs_return
44 c2PQWFReturn = null;
45 c2ps_opWFParser.searchLEM_return
46 c2PQLEMReturn = null;
47
48 /*
49 System.out.println("check opWF:" + index + ": " + input);
50 System.out.flush();
51 */
52
53 try
54 {
55 if( bLem )
56 c2PQLEMReturn = g.searchLEM();
57 else
58 c2PQWFReturn = g.searchWFs();
59 }
60 catch (RecognitionException e)
61 {
62 e.printStackTrace();
63 }
64
65 // AST Tree anzeigen:
66 Tree tree = bLem ? (Tree)c2PQLEMReturn.getTree() : (Tree)c2PQWFReturn.getTree();
67 // System.out.println(bLem? "opLEM: " : "opWF: " + tree.toStringTree() );
68
69 return tree;
70 }
71
72 /* Wordform Encoding, e.g. to insert a Wordform into an AST.
73 * a) wf -> "wf".
74 * b) remove escape char before ':': abc\: -> abc:.
75 * Returns a Tree.
76 */
77 public static Tree encode(String wf, int tokenType)
78
79 {
80 // b)
81 StringBuffer
82 sbWF = new StringBuffer(wf);
83
84 for(int i=0; i<sbWF.length()-1; i++)
85 {
86 if( sbWF.charAt(i) == '\\' && sbWF.charAt(i+1) == ':' )
87 sbWF.deleteCharAt(i);
88 }
89
90 return new CommonTree(new CommonToken(tokenType, "\"" + sbWF.toString() + "\""));
91 }
92
93 /*
94 * main testprogram:
95 */
96
97 public static void main(String args[]) throws Exception
98 {
99 String[]
100 input = {":fi:Hendrix:sa", ":FiOlDs:été:sa", "&Gitarre", "&Gitarre:sa/-pe",
101 " \"Institut für \\:Deutsche\\: Sprache\" ",
102 ":Fi:der:-sa Wilde:-se Western:/se" };
103 Tree
104 tree;
105 boolean
106 bLem;
107
108 System.out.println("Tests von WF und Lemma-Optionen:\n");
109
110 for(int i=0; i<input.length; i++)
111 {
112 bLem = input[i].charAt(0) == '&' ? true : false;
113
114 System.out.println(bLem? "LEM: " : "WF: " + "input: " + input[i]);
115
116 if( bLem )
117 tree = check(input[i], false, true, 0); // bStrip=false, bLem=true;
118 else
119 tree = check(input[i], false, false, 0); // bStrip=false, bLem=false.
120
121 System.out.println(bLem? "LEM: " : "WF: " + "AST : " + tree.toStringTree() + "\n");
122 }
123
124 } // main
125
126}