blob: 7ebf161e51697355352a1eda5a4e6dbf979a8d8e [file] [log] [blame]
Nils Diewaldff0f8742015-02-26 20:42:45 +00001package de.ids_mannheim.korap.response.match;
Nils Diewaldbb33da22015-03-04 16:24:25 +00002
Nils Diewaldff0f8742015-02-26 20:42:45 +00003import java.util.*;
4import java.util.regex.*;
5
6public class MatchIdentifier extends DocIdentifier {
7 private int startPos, endPos = -1;
8
9 private ArrayList<int[]> pos = new ArrayList<>(8);
10
Akron48937e92015-06-26 01:49:02 +020011 // TODO: "contains" is necessary for a compatibility bug in Kustvakt
Akron40550172015-08-04 03:06:12 +020012 Pattern idRegex = Pattern.compile("^(?:match-|contains-)"
13 + "(?:([^!]+?)[!\\.])?" + "([^!]+)-p([0-9]+)-([0-9]+)"
14 + "((?:\\(-?[0-9]+\\)-?[0-9]+--?[0-9]+)*)" + "(?:c.+?)?$");
Nils Diewaldbb33da22015-03-04 16:24:25 +000015 Pattern posRegex = Pattern.compile("\\(([0-9]+)\\)([0-9]+)-([0-9]+)");
16
Nils Diewaldff0f8742015-02-26 20:42:45 +000017
18 public MatchIdentifier () {};
Nils Diewaldbb33da22015-03-04 16:24:25 +000019
20
Nils Diewaldff0f8742015-02-26 20:42:45 +000021 public MatchIdentifier (String id) {
Akron48937e92015-06-26 01:49:02 +020022
23 // Replace for legacy reasons with incompatible versions of Kustvakt
24 id = id.replaceAll("^(contains-|match-)([^-!_\\.]+?)!\\2_", "$1$2_");
25
Nils Diewaldff0f8742015-02-26 20:42:45 +000026 Matcher matcher = idRegex.matcher(id);
27 if (matcher.matches()) {
Akron8f6f7a32015-06-25 01:03:15 +020028
Akron48937e92015-06-26 01:49:02 +020029 // <legacy>
30 // and test compatibility
31 if (id.contains("!") || !id.contains("_")) {
32 this.setCorpusID(matcher.group(1));
33 this.setDocID(matcher.group(2));
34 }
35 // </legacy>
36 else {
37 // this.getCorpusID() + "." + this.getDocID()
38 this.setTextSigle(matcher.group(1) + '.' + matcher.group(2));
39 };
Akron8f6f7a32015-06-25 01:03:15 +020040
Nils Diewaldff0f8742015-02-26 20:42:45 +000041 this.setStartPos(Integer.parseInt(matcher.group(3)));
42 this.setEndPos(Integer.parseInt(matcher.group(4)));
43
44 if (matcher.group(5) != null) {
45 matcher = posRegex.matcher(matcher.group(5));
46 while (matcher.find()) {
Nils Diewaldbb33da22015-03-04 16:24:25 +000047 this.addPos(Integer.parseInt(matcher.group(2)),
48 Integer.parseInt(matcher.group(3)),
49 Integer.parseInt(matcher.group(1)));
Nils Diewaldff0f8742015-02-26 20:42:45 +000050 };
51 };
52 };
53 };
54
Nils Diewaldbb33da22015-03-04 16:24:25 +000055
Nils Diewaldff0f8742015-02-26 20:42:45 +000056 public int getStartPos () {
57 return this.startPos;
58 };
59
Nils Diewaldbb33da22015-03-04 16:24:25 +000060
Nils Diewaldff0f8742015-02-26 20:42:45 +000061 public void setStartPos (int pos) {
62 if (pos >= 0)
63 this.startPos = pos;
64 };
65
Nils Diewaldbb33da22015-03-04 16:24:25 +000066
Nils Diewaldff0f8742015-02-26 20:42:45 +000067 public int getEndPos () {
68 return this.endPos;
69 };
70
Nils Diewaldbb33da22015-03-04 16:24:25 +000071
Nils Diewaldff0f8742015-02-26 20:42:45 +000072 public void setEndPos (int pos) {
73 if (pos >= 0)
74 this.endPos = pos;
75 };
76
Nils Diewaldbb33da22015-03-04 16:24:25 +000077
78 public void addPos (int start, int end, int number) {
Nils Diewaldff0f8742015-02-26 20:42:45 +000079 if (start >= 0 && end >= 0 && number >= 0)
Nils Diewaldbb33da22015-03-04 16:24:25 +000080 this.pos.add(new int[] { start, end, number });
Nils Diewaldff0f8742015-02-26 20:42:45 +000081 };
82
Nils Diewaldbb33da22015-03-04 16:24:25 +000083
Nils Diewaldff0f8742015-02-26 20:42:45 +000084 public ArrayList<int[]> getPos () {
85 return this.pos;
86 };
87
Nils Diewaldbb33da22015-03-04 16:24:25 +000088
Nils Diewaldff0f8742015-02-26 20:42:45 +000089 public String toString () {
Nils Diewaldff0f8742015-02-26 20:42:45 +000090 StringBuilder sb = new StringBuilder("match-");
91
Akron8f6f7a32015-06-25 01:03:15 +020092 if (this.docID == null) {
93 if (this.textSigle == null)
94 return null;
95
96 sb.append(this.textSigle);
97 }
98
Nils Diewaldff0f8742015-02-26 20:42:45 +000099 // Get prefix string corpus/doc
Akron8f6f7a32015-06-25 01:03:15 +0200100 // LEGACY
101 else if (this.corpusID != null) {
102 sb.append(this.corpusID).append('!').append(this.docID);
103 }
104 else {
105 sb.append(this.docID);
106 };
Nils Diewaldff0f8742015-02-26 20:42:45 +0000107
Akron640458c2015-06-25 12:36:15 +0200108 sb.append('-').append(this.getPositionString());
Nils Diewaldff0f8742015-02-26 20:42:45 +0000109 return sb.toString();
110 };
111
112
113 public String getPositionString () {
114 StringBuilder sb = new StringBuilder();
115 sb.append('p').append(this.startPos).append('-').append(this.endPos);
116
117 // Get Position information
118 for (int[] i : this.pos) {
119 sb.append('(').append(i[2]).append(')');
120 sb.append(i[0]).append('-').append(i[1]);
121 };
Nils Diewaldbb33da22015-03-04 16:24:25 +0000122
Nils Diewaldff0f8742015-02-26 20:42:45 +0000123 return sb.toString();
124 };
125};