blob: f89fd073ef104130ab50d5d3754805bf98e6da99 [file] [log] [blame]
package de.ids_mannheim.korap.index;
import java.util.*;
import java.util.regex.*;
import de.ids_mannheim.korap.index.DocIdentifier;
public class MatchIdentifier extends DocIdentifier {
private int startPos, endPos = 0;
private ArrayList<int[]> pos = new ArrayList<>(8);
Pattern idRegex = Pattern.compile(
"^match-(?:([^!]+?)!)?" +
"([^!]+)-p([0-9]+)-([0-9]+)" +
"((?:\\(-?[0-9]+\\)-?[0-9]+--?[0-9]+)*)" +
"(?:c.+?)?$");
Pattern posRegex = Pattern.compile(
"\\(([0-9]+)\\)([0-9]+)-([0-9]+)");
public MatchIdentifier () {};
public MatchIdentifier (String id) {
Matcher matcher = idRegex.matcher(id);
if (matcher.matches()) {
this.setCorpusID(matcher.group(1));
this.setDocID(matcher.group(2));
this.setStartPos(Integer.parseInt(matcher.group(3)));
this.setEndPos(Integer.parseInt(matcher.group(4)));
if (matcher.group(5) != null) {
matcher = posRegex.matcher(matcher.group(5));
while (matcher.find()) {
this.addPos(
Integer.parseInt(matcher.group(2)),
Integer.parseInt(matcher.group(3)),
Integer.parseInt(matcher.group(1))
);
};
};
};
};
public int getStartPos () {
return this.startPos;
};
public void setStartPos (int pos) {
if (pos >= 0)
this.startPos = pos;
};
public int getEndPos () {
return this.endPos;
};
public void setEndPos (int pos) {
if (pos >= 0)
this.endPos = pos;
};
public void addPos(int start, int end, int number) {
if (start >= 0 && end >= 0 && number >= 0)
this.pos.add(new int[]{start, end, number});
};
public ArrayList<int[]> getPos () {
return this.pos;
};
public String toString () {
if (this.docID == null) return null;
StringBuilder sb = new StringBuilder("match-");
// Get prefix string corpus/doc
if (this.corpusID != null) {
sb.append(this.corpusID).append('!');
};
sb.append(this.docID);
sb.append("-p");
sb.append(this.startPos).append('-').append(this.endPos);
// Get Position information
for (int[] i : this.pos) {
sb.append('(').append(i[2]).append(')');
sb.append(i[0]).append('-').append(i[1]);
};
/*
if (this.processed) {
sb.append('c');
for (int[] s : this.span) {
if (s[2] >= 256)
continue;
if (s[2] != -1)
sb.append('(').append(s[2]).append(')');
sb.append(s[0] + this.startOffsetChar);
sb.append('-');
sb.append(s[1] + this.startOffsetChar);
};
};
*/
return sb.toString();
};
};