blob: 6354f822a4ac5a00a950b31c98d61c38a15d1ef0 [file] [log] [blame]
package de.ids_mannheim.korap.index;
import java.util.*;
import java.nio.ByteBuffer;
import java.lang.StringBuffer;
import java.util.regex.*;
import de.ids_mannheim.korap.response.Match;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class TermInfo implements Comparable<TermInfo> {
// Logger
private final static Logger log = LoggerFactory.getLogger(Match.class);
// This advices the java compiler to ignore all loggings
public static final boolean DEBUG = false;
private String foundry, layer, value, term, type, annotation;
// type can be "term", "pos", "span", "rel-src", "rel-target"
private int pos = 0;
private ByteBuffer payload;
private boolean analyzed = false;
private int startChar = -1, endChar = -1, startPos = -1, endPos = -1;
private byte depth = (byte) 0;
private Pattern prefixRegex = Pattern
.compile("(?:([^/]+)/)?([^:/]+)(?::(.+?))?");
private Matcher matcher;
public TermInfo (String term, int pos, ByteBuffer payload) {
this.term = term;
this.startPos = pos;
this.endPos = pos;
this.payload = payload;
};
public TermInfo analyze () {
if (analyzed)
return this;
int ttype = 0;
String tterm = this.term;
int lastPos = this.payload.position();
this.payload.rewind();
switch (tterm.charAt(0)) {
case '<':
// "<>:mate/l:..."
if (tterm.charAt(1) == '>') {
// span
this.type = "span";
tterm = tterm.substring(3);
ttype = 2;
}
// rel-target
else {
this.type = "relTarget";
tterm = tterm.substring(2);
ttype = 3;
}
;
break;
case '>':
// rel-src
this.type = "relSrc";
tterm = tterm.substring(2);
ttype = 3;
break;
case '_':
// pos
this.type = "pos";
ttype = 1;
tterm = tterm.substring(1);
break;
default:
// term
this.type = "term";
};
// Analyze term value
if (ttype != 1) {
if (DEBUG)
log.trace("Check {} for {}", tterm, prefixRegex.toString());
matcher = prefixRegex.matcher(tterm);
if (matcher.matches() && matcher.groupCount() == 3) {
this.annotation = tterm;
if (matcher.group(1) != null)
this.foundry = matcher.group(1);
else
this.foundry = "base";
this.layer = matcher.group(2);
this.value = matcher.group(3);
};
}
// for positions
else {
this.value = tterm;
this.startChar = this.payload.getInt();
this.endChar = this.payload.getInt();
};
// for spans
if (ttype == 2) {
this.startChar = this.payload.getInt();
this.endChar = this.payload.getInt();
};
// for spans and relations
if (ttype > 1)
// Unsure if this is correct
this.endPos = this.payload.getInt() - 1;
if (ttype == 2 && this.payload.position() < lastPos) {
this.depth = this.payload.get();
};
// payloads can have different meaning
analyzed = true;
return this;
};
public String getType () {
return this.type;
};
public int getStartChar () {
return this.startChar;
};
public void setStartChar (int pos) {
this.startChar = pos;
};
public int getEndChar () {
return this.endChar;
};
public void setEndChar (int pos) {
this.endChar = pos;
};
public int getStartPos () {
return this.startPos;
};
public int getEndPos () {
return this.endPos;
};
public byte getDepth () {
return this.depth;
};
public String getFoundry () {
return this.foundry;
};
public String getLayer () {
return this.layer;
};
public String getValue () {
return this.value;
};
public String getAnnotation () {
return this.annotation;
};
public String toString () {
this.analyze();
StringBuffer sb = new StringBuffer();
sb.append('<').append(this.getType()).append('>');
sb.append(this.getFoundry()).append('/').append(this.getLayer());
if (this.getValue() != null)
sb.append(':').append(this.getValue());
if (this.getDepth() != (byte) 0)
sb.append('(').append(this.getDepth()).append(')');
sb.append('[').append(this.getStartPos());
sb.append('-').append(this.getEndPos()).append(']');
sb.append('[').append(this.getStartChar());
sb.append('-').append(this.getEndChar()).append(']');
return sb.toString();
};
@Override
public int compareTo (TermInfo obj) {
this.analyze();
obj.analyze();
// TODO: This sorting does not seem to work!
// although it might only be important for depth stuff.
if (this.startChar < obj.startChar) {
return -1;
}
else if (this.startChar > obj.startChar) {
return 1;
}
else if (this.depth < obj.depth) {
return 1;
}
else if (this.depth > obj.depth) {
return -1;
};
return 0;
};
};