Support signed match identifiers
Change-Id: Ib0fe20bcad0b3f6984bad0db1da326600956685a
diff --git a/Changes b/Changes
index d0bac00..d5fc72b 100644
--- a/Changes
+++ b/Changes
@@ -1,3 +1,7 @@
+0.64.0 2025-03-28
+ - [feature] Support signed matchids to prevent consecutive
+ positional requests (diewald)
+
0.63.3 2024-12-15
- [performance] Improve short circuit on count=0 and
cutoff=true (diewald)
diff --git a/pom.xml b/pom.xml
index be572a1..8709c17 100644
--- a/pom.xml
+++ b/pom.xml
@@ -35,7 +35,7 @@
<groupId>de.ids-mannheim.korap.krill</groupId>
<artifactId>Krill</artifactId>
- <version>0.63.3</version>
+ <version>0.64.0</version>
<packaging>jar</packaging>
<name>Krill</name>
diff --git a/src/main/java/de/ids_mannheim/korap/response/match/MatchIdentifier.java b/src/main/java/de/ids_mannheim/korap/response/match/MatchIdentifier.java
index 80737cd..e94f13a 100644
--- a/src/main/java/de/ids_mannheim/korap/response/match/MatchIdentifier.java
+++ b/src/main/java/de/ids_mannheim/korap/response/match/MatchIdentifier.java
@@ -2,21 +2,57 @@
import java.util.*;
import java.util.regex.*;
+import java.util.Base64;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import static java.nio.charset.StandardCharsets.UTF_8;
+
+import javax.crypto.Mac;
+import javax.crypto.spec.SecretKeySpec;
+import java.security.MessageDigest;
+import java.nio.charset.StandardCharsets;
+
+import de.ids_mannheim.korap.util.KrillProperties;
public class MatchIdentifier extends DocIdentifier {
private int startPos, endPos = -1;
+ // Logger
+ private final static Logger log = LoggerFactory.getLogger(MatchIdentifier.class);
+
private ArrayList<int[]> pos = new ArrayList<>(8);
+ String idRegexPos = "(p([0-9]+)-([0-9]+)"
+ + "((?:\\(-?[0-9]+\\)-?[0-9]+--?[0-9]+)*)"
+ + "(?:c.+?)?)";
+
// Remember: "contains" is necessary for a compatibility bug in Kustvakt
// Identifier pattern is "match-
Pattern idRegex = Pattern.compile("^(?:match-|contains-)"
+ "(?:([^!]+?)[!\\.])?"
- + "([^!]+)[-/]p([0-9]+)-([0-9]+)"
- + "((?:\\(-?[0-9]+\\)-?[0-9]+--?[0-9]+)*)"
- + "(?:c.+?)?$");
+ + "([^!]+)[-/]"
+ + idRegexPos
+ + "(?:x_([a-zA-Z0-9-_]+?))?"
+ + "$");
+
Pattern posRegex = Pattern.compile("\\(([0-9]+)\\)([0-9]+)-([0-9]+)");
+
+ private static volatile Mac mac = null;
+ {
+ if (mac == null) {
+ // Load the secret key from the properties file
+ Properties prop = KrillProperties.loadDefaultProperties();
+
+ // The secret is only fix, if the matchIDs need to be treated as
+ // persistant identifiers, otherwise it only needs to be stable temporarily
+ String secretKey = KrillProperties.secret;
+
+ initMac(secretKey);
+ };
+ };
public MatchIdentifier () {};
@@ -29,13 +65,14 @@
* compatibility.
*/
public MatchIdentifier (String id) {
-
+
// Replace for legacy reasons with incompatible versions of Kustvakt
id = id.replaceAll("^(contains-|match-)([^!_\\.]+?)!\\2_", "$1$2_");
+
Matcher matcher = idRegex.matcher(id);
if (matcher.matches()) {
-
+
// textSigle is provided directly
if (matcher.group(1) == null && id.contains("/")) {
// Todo: potentially use UID!
@@ -56,17 +93,44 @@
};
// </legacy>
- this.setStartPos(Integer.parseInt(matcher.group(3)));
- this.setEndPos(Integer.parseInt(matcher.group(4)));
+ if (mac != null) {
+
+ String posString = matcher.group(3);
- if (matcher.group(5) != null) {
- matcher = posRegex.matcher(matcher.group(5));
- while (matcher.find()) {
- this.addPos(Integer.parseInt(matcher.group(2)),
- Integer.parseInt(matcher.group(3)),
- Integer.parseInt(matcher.group(1)));
+ String message = this.getTextSigle() + "::" + posString;
+
+ String hmacStr = matcher.group(7);
+
+
+ // No signature returned
+ if (hmacStr == null) {
+ this.textSigle = "";
+ return;
+ };
+
+ byte[] hmacBytes = Base64.getUrlDecoder().decode(hmacStr);
+
+ // Generate the HMAC hash
+ byte[] hmacVerify = mac.doFinal(message.getBytes(StandardCharsets.UTF_8));
+
+ if (!MessageDigest.isEqual(hmacBytes, hmacVerify)) {
+ this.textSigle = "";
+ return;
};
};
+
+ this.setStartPos(Integer.parseInt(matcher.group(4)));
+ this.setEndPos(Integer.parseInt(matcher.group(5)));
+
+ if (matcher.group(6) != null) {
+
+ matcher = posRegex.matcher(matcher.group(6));
+ while (matcher.find()) {
+ this.addPos(Integer.parseInt(matcher.group(2)),
+ Integer.parseInt(matcher.group(3)),
+ Integer.parseInt(matcher.group(1)));
+ };
+ };
};
};
@@ -123,13 +187,31 @@
sb.append(this.docID);
};
- sb.append('-').append(this.getPositionString());
+ sb.append('-');
+
+ sb.append(this.getPositionString());
+
+ // Add signature
+ if (mac != null) {
+ String message = this.getTextSigle() + "::" + this.getPositionString();
+
+ // Generate the HMAC hash
+ byte[] hmac = mac.doFinal(message.getBytes(StandardCharsets.UTF_8));
+
+ String hmacStr = Base64.getUrlEncoder()
+ .withoutPadding()
+ .encodeToString(hmac);
+
+ // Signature marker
+ sb.append("x_").append(hmacStr);
+ };
+
return sb.toString();
};
public String getPositionString () {
- StringBuilder sb = new StringBuilder();
+ StringBuilder sb = new StringBuilder();
sb.append('p').append(this.startPos).append('-').append(this.endPos);
// Get Position information
@@ -140,4 +222,18 @@
return sb.toString();
};
+
+ public static void initMac(String secretKey) {
+ if (secretKey != "") {
+ try {
+ mac = Mac.getInstance("HmacSHA256");
+ SecretKeySpec keySpec = new SecretKeySpec(secretKey.getBytes(StandardCharsets.UTF_8), "HmacSHA256");
+ mac.init(keySpec);
+ } catch (Exception e) {
+ log.error("Can't initialize match id signing: {}", e);
+ };
+ } else {
+ mac = null;
+ };
+ };
};
diff --git a/src/main/java/de/ids_mannheim/korap/util/KrillProperties.java b/src/main/java/de/ids_mannheim/korap/util/KrillProperties.java
index 6ed4dbc..68c3e7c 100644
--- a/src/main/java/de/ids_mannheim/korap/util/KrillProperties.java
+++ b/src/main/java/de/ids_mannheim/korap/util/KrillProperties.java
@@ -28,6 +28,9 @@
public static String namedVCPath = "";
public static boolean isTest = false;
+
+ public static String secret = "";
+
// Logger
private final static Logger log = LoggerFactory
@@ -118,6 +121,8 @@
String matchExpansion = prop.getProperty(
"krill.match." + "expansion.includeContextSize", "false");
matchExpansionIncludeContextSize = Boolean.parseBoolean(matchExpansion);
+
+ secret = prop.getProperty("krill.secretB64", "");
}
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestMatchIdentifier.java b/src/test/java/de/ids_mannheim/korap/index/TestMatchIdentifier.java
index a0ac953..57d8177 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestMatchIdentifier.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestMatchIdentifier.java
@@ -2,6 +2,7 @@
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
@@ -107,7 +108,6 @@
assertEquals("corpus-1/doc-1/text-1", id.getTextSigle());
};
-
@Test
public void posIdentifierExample1 () throws IOException {
PosIdentifier id = new PosIdentifier();
@@ -120,6 +120,60 @@
assertEquals(id.toString(), "token-c1!d1-p8");
};
+ @Test
+ public void posIdentifierExampleSign () throws IOException {
+
+ MatchIdentifier.initMac("tree");
+
+ MatchIdentifier id = new MatchIdentifier();
+ id.setTextSigle("aaa/bbb/ccc");
+ id.setStartPos(8);
+ id.setEndPos(10);
+ assertEquals(id.toString(), "match-aaa/bbb/ccc-p8-10x_ibY-h1k-VJ4aZjBFgTu8N4OI6xqcp-PkUrjQ9080Kr8");
+
+ id = new MatchIdentifier("match-aaa/bbb/ccc-p8-10x_ibY-h1k-VJ4aZjBFgTu8N4OI6xqcp-PkUrjQ9080Kr8");
+
+ assertNotNull(id);
+ assertEquals(id.getTextSigle(),"aaa/bbb/ccc");
+ assertEquals(id.getStartPos(),8);
+ assertEquals(id.getEndPos(),10);
+
+ // Fail - match wrong: p9 instead of p8
+ id = new MatchIdentifier("match-aaa/bbb/ccc-p9-10x_ibY-h1k-VJ4aZjBFgTu8N4OI6xqcp-PkUrjQ9080Kr8");
+
+ assertNotNull(id);
+ assertEquals(id.getTextSigle(),"");
+ assertEquals(id.getStartPos(),0);
+ assertEquals(id.getEndPos(),-1);
+
+ // Fail - signature wrong: 4Ou6 instead of 40I6
+ id = new MatchIdentifier("match-aaa/bbb/ccc-p8-10x_ibY-h1k-VJ4aZjBFgTu8N4Ou6xqcp-PkUrjQ9080Kr8");
+
+ assertNotNull(id);
+ assertEquals(id.getTextSigle(),"");
+ assertEquals(id.getStartPos(),0);
+ assertEquals(id.getEndPos(),-1);
+
+ // Fail - signature wrong: vJ instead of VJ
+ id = new MatchIdentifier("match-aaa/bbb/ccc-p8-10x_ibY-h1k-vJ4aZjBFgTu8N4OI6xqcp-PkUrjQ9080Kr8");
+
+ assertNotNull(id);
+ assertEquals(id.getTextSigle(),"");
+ assertEquals(id.getStartPos(),0);
+ assertEquals(id.getEndPos(),-1);
+
+ // Fail - match wrong: aab instead of aaa
+ id = new MatchIdentifier("match-aab/bbb/ccc-p8-10x_ibY-h1k-VJ4aZjBFgTu8N4OI6xqcp-PkUrjQ9080Kr8");
+
+ assertNotNull(id);
+ assertEquals(id.getTextSigle(),"");
+ assertEquals(id.getStartPos(),0);
+ assertEquals(id.getEndPos(),-1);
+
+
+ MatchIdentifier.initMac("");
+ };
+
@Test
public void posIdentifierExample2 () throws IOException {
PosIdentifier id = new PosIdentifier();
@@ -159,6 +213,35 @@
assertEquals("ID (0)", "match-c1!d1-p7-9(2)7-8(1)8-8", km.getID());
};
+ @Test
+ public void indexExample1Sign () throws IOException {
+ MatchIdentifier.initMac("tree");
+
+ KrillIndex ki = new KrillIndex();
+ ki.addDoc(createSimpleFieldDoc());
+ ki.commit();
+
+ QueryBuilder kq = new QueryBuilder("tokens");
+ Krill ks = new Krill(
+ kq.nr(2, kq.seq(kq.seg("s:b")).append(kq.nr(kq.seg("s:a")))));
+ Result kr = ki.search(ks);
+
+ assertEquals("totalResults", kr.getTotalResults(), 1);
+ assertEquals("StartPos (0)", kr.getMatch(0).startPos, 7);
+ assertEquals("EndPos (0)", kr.getMatch(0).endPos, 9);
+
+ Match km = kr.getMatch(0);
+
+ assertEquals("SnippetBrackets (0)", "... bcabca[[{2:b{1:a}}]]c",
+ km.getSnippetBrackets());
+ assertEquals("SnippetTokens (0)", "{\"left\":[\"b\",\"c\",\"a\",\"b\",\"c\",\"a\"],\"match\":[\"b\",\"a\"],\"right\":[\"c\"],\"classes\":[[2,0,1],[1,1,1]]}",
+ km.getSnippetTokens().toString());
+ assertEquals("ID (0)", "match-c1!d1-p7-9(2)7-8(1)8-8x_07WRwmjA5EigwG8wYcURhnz_WkL9cepvU96hC2mp6SE", km.getID());
+
+ MatchIdentifier.initMac("");
+ };
+
+
@Test
public void indexExample2 () throws IOException, QueryException {