blob: cc3d12ae8d2a3ba5c64d0c22710345a61e0af5eb [file] [log] [blame]
Nils Diewald2cd1c3d2014-01-08 22:53:08 +00001package de.ids_mannheim.korap.index;
2
3import java.util.*;
Nils Diewald138e5b92014-01-10 21:15:13 +00004import java.nio.ByteBuffer;
Nils Diewaldcde69082014-01-16 15:46:48 +00005import java.lang.StringBuffer;
Nils Diewald138e5b92014-01-10 21:15:13 +00006import java.util.regex.*;
7import de.ids_mannheim.korap.KorapMatch;
Nils Diewald2cd1c3d2014-01-08 22:53:08 +00008
Nils Diewald138e5b92014-01-10 21:15:13 +00009import org.slf4j.Logger;
10import org.slf4j.LoggerFactory;
Nils Diewald2cd1c3d2014-01-08 22:53:08 +000011
Nils Diewald138e5b92014-01-10 21:15:13 +000012public class TermInfo implements Comparable<TermInfo> {
13
14 // Logger
15 private final static Logger log = LoggerFactory.getLogger(KorapMatch.class);
Nils Diewald82a4b862014-02-20 21:17:41 +000016 // This advices the java compiler to ignore all loggings
17 public static final boolean DEBUG = false;
18
Nils Diewald138e5b92014-01-10 21:15:13 +000019
Nils Diewaldcde69082014-01-16 15:46:48 +000020 private String foundry, layer, value, term, type, annotation;
Nils Diewald138e5b92014-01-10 21:15:13 +000021 // type can be "term", "pos", "span", "rel-src", "rel-target"
22
Nils Diewald2cd1c3d2014-01-08 22:53:08 +000023 private int pos = 0;
Nils Diewald138e5b92014-01-10 21:15:13 +000024 private ByteBuffer payload;
25 private boolean analyzed = false;
Nils Diewald2cd1c3d2014-01-08 22:53:08 +000026
Nils Diewald138e5b92014-01-10 21:15:13 +000027 private int startChar = -1,
28 endChar = -1,
29 startPos = -1,
30 endPos = -1;
Nils Diewald2cd1c3d2014-01-08 22:53:08 +000031
Nils Diewald138e5b92014-01-10 21:15:13 +000032 private byte depth = (byte) 0;
33
Nils Diewald345bdc02014-01-21 21:48:57 +000034 private Pattern prefixRegex = Pattern.compile("(?:([^/]+)/)?([^:/]+)(?::(.+?))?");
Nils Diewald138e5b92014-01-10 21:15:13 +000035 private Matcher matcher;
36
37 public TermInfo (String term, int pos, ByteBuffer payload) {
38 this.term = term;
39 this.startPos = pos;
Nils Diewaldcde69082014-01-16 15:46:48 +000040 this.endPos = pos;
Nils Diewald138e5b92014-01-10 21:15:13 +000041 this.payload = payload;
42 };
43
44 public TermInfo analyze () {
45 if (analyzed)
46 return this;
47
48 int ttype = 0;
49 String tterm = this.term;
Nils Diewald345bdc02014-01-21 21:48:57 +000050 int lastPos = this.payload.position();
Nils Diewald138e5b92014-01-10 21:15:13 +000051 this.payload.rewind();
52
53 switch (tterm.charAt(0)) {
54 case '<':
55 // "<>:mate/l:..."
56 if (tterm.charAt(1) == '>') {
57 // span
58 this.type = "span";
59 tterm = tterm.substring(3);
60 ttype = 2;
61 }
62 // rel-target
63 else {
64 this.type = "relTarget";
65 tterm = tterm.substring(2);
66 ttype = 3;
67 };
68 break;
69 case '>':
70 // rel-src
71 this.type = "relSrc";
72 tterm = tterm.substring(2);
73 ttype = 3;
74 break;
75
76 case '_':
77 // pos
78 this.type = "pos";
79 ttype = 1;
80 tterm = tterm.substring(1);
81 break;
82 default:
83 // term
84 this.type = "term";
85 };
86
87 // Analyze term value
88 if (ttype != 1) {
Nils Diewald82a4b862014-02-20 21:17:41 +000089 if (DEBUG)
90 log.trace("Check {} for {}", tterm, prefixRegex.toString());
Nils Diewald138e5b92014-01-10 21:15:13 +000091 matcher = prefixRegex.matcher(tterm);
92 if (matcher.matches() && matcher.groupCount() == 3) {
Nils Diewaldcde69082014-01-16 15:46:48 +000093 this.annotation = tterm;
Nils Diewald345bdc02014-01-21 21:48:57 +000094 if (matcher.group(1) != null)
95 this.foundry = matcher.group(1);
96 else
97 this.foundry = "base";
Nils Diewald138e5b92014-01-10 21:15:13 +000098 this.layer = matcher.group(2);
99 this.value = matcher.group(3);
100 };
101 }
102
103 // for positions
104 else {
105 this.value = tterm;
106 this.startChar = this.payload.getInt();
107 this.endChar = this.payload.getInt();
108 };
109
110 // for spans
111 if (ttype == 2) {
112 this.startChar = this.payload.getInt();
113 this.endChar = this.payload.getInt();
114 };
115
116 // for spans and relations
117 if (ttype > 1)
Nils Diewaldcde69082014-01-16 15:46:48 +0000118 // Unsure if this is correct
119 this.endPos = this.payload.getInt() -1;
Nils Diewald138e5b92014-01-10 21:15:13 +0000120
Nils Diewald345bdc02014-01-21 21:48:57 +0000121 if (ttype == 2 && this.payload.position() < lastPos) {
Nils Diewald138e5b92014-01-10 21:15:13 +0000122 this.depth = this.payload.get();
123 };
124
125 // payloads can have different meaning
126 analyzed = true;
127 return this;
128 };
129
130 public String getType () {
131 return this.type;
132 };
133
134 public int getStartChar () {
135 return this.startChar;
136 };
137
138 public void setStartChar (int pos) {
139 this.startChar = pos;
140 };
141
142 public int getEndChar () {
143 return this.endChar;
144 };
145
146 public void setEndChar (int pos) {
147 this.endChar = pos;
148 };
149
150 public int getStartPos () {
151 return this.startPos;
152 };
153
154 public int getEndPos () {
155 return this.endPos;
156 };
157
158 public byte getDepth () {
159 return this.depth;
160 };
161
162 public String getFoundry () {
163 return this.foundry;
164 };
165
166 public String getLayer () {
167 return this.layer;
168 };
169
170 public String getValue () {
171 return this.value;
172 };
173
Nils Diewaldcde69082014-01-16 15:46:48 +0000174 public String getAnnotation () {
175 return this.annotation;
176 };
177
Nils Diewald345bdc02014-01-21 21:48:57 +0000178 public String toString () {
179 this.analyze();
180
181 StringBuffer sb = new StringBuffer();
182 sb.append('<').append(this.getType()).append('>');
183 sb.append(this.getFoundry()).append('/').append(this.getLayer());
184
185 if (this.getValue() != null)
186 sb.append(':').append(this.getValue());
187
188 if (this.getDepth() != (byte) 0)
189 sb.append('(').append(this.getDepth()).append(')');
190
191 sb.append('[').append(this.getStartPos());
192 sb.append('-').append(this.getEndPos()).append(']');
193 sb.append('[').append(this.getStartChar());
194 sb.append('-').append(this.getEndChar()).append(']');
195
196 return sb.toString();
197 };
198
Nils Diewald138e5b92014-01-10 21:15:13 +0000199 @Override
200 public int compareTo (TermInfo obj) {
201 this.analyze();
202 obj.analyze();
203
Nils Diewaldcde69082014-01-16 15:46:48 +0000204 // TODO: This sorting does not seem to work!
205 // although it might only be important for depth stuff.
206
Nils Diewald138e5b92014-01-10 21:15:13 +0000207 if (this.startChar < obj.startChar) {
208 return -1;
209 }
210 else if (this.startChar > obj.startChar) {
211 return 1;
212 }
213 else if (this.depth < obj.depth) {
214 return 1;
215 }
216 else if (this.depth > obj.depth) {
217 return -1;
218 };
219 return 0;
Nils Diewald2cd1c3d2014-01-08 22:53:08 +0000220 };
221};