Support signed match identifiers
Change-Id: Ib0fe20bcad0b3f6984bad0db1da326600956685a
diff --git a/Changes b/Changes
index d0bac00..d5fc72b 100644
--- a/Changes
+++ b/Changes
@@ -1,3 +1,7 @@
+0.64.0 2025-03-28
+    - [feature] Support signed matchids to prevent consecutive
+      positional requests (diewald)
+
 0.63.3 2024-12-15
     - [performance] Improve short circuit on count=0 and
       cutoff=true (diewald)
diff --git a/pom.xml b/pom.xml
index be572a1..8709c17 100644
--- a/pom.xml
+++ b/pom.xml
@@ -35,7 +35,7 @@
 
   <groupId>de.ids-mannheim.korap.krill</groupId>
   <artifactId>Krill</artifactId>
-  <version>0.63.3</version>
+  <version>0.64.0</version>
   <packaging>jar</packaging>
 
   <name>Krill</name>
diff --git a/src/main/java/de/ids_mannheim/korap/response/match/MatchIdentifier.java b/src/main/java/de/ids_mannheim/korap/response/match/MatchIdentifier.java
index 80737cd..e94f13a 100644
--- a/src/main/java/de/ids_mannheim/korap/response/match/MatchIdentifier.java
+++ b/src/main/java/de/ids_mannheim/korap/response/match/MatchIdentifier.java
@@ -2,21 +2,57 @@
 
 import java.util.*;
 import java.util.regex.*;
+import java.util.Base64;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import static java.nio.charset.StandardCharsets.UTF_8;
+
+import javax.crypto.Mac;
+import javax.crypto.spec.SecretKeySpec;
+import java.security.MessageDigest;
+import java.nio.charset.StandardCharsets;
+
+import de.ids_mannheim.korap.util.KrillProperties;
 
 public class MatchIdentifier extends DocIdentifier {
     private int startPos, endPos = -1;
 
+    // Logger
+    private final static Logger log = LoggerFactory.getLogger(MatchIdentifier.class);
+    
     private ArrayList<int[]> pos = new ArrayList<>(8);
 
+    String idRegexPos = "(p([0-9]+)-([0-9]+)"
+        + "((?:\\(-?[0-9]+\\)-?[0-9]+--?[0-9]+)*)"
+        + "(?:c.+?)?)";
+    
     // Remember: "contains" is necessary for a compatibility bug in Kustvakt
 	// Identifier pattern is "match-
     Pattern idRegex = Pattern.compile("^(?:match-|contains-)"
 									  + "(?:([^!]+?)[!\\.])?"
-									  + "([^!]+)[-/]p([0-9]+)-([0-9]+)"
-									  + "((?:\\(-?[0-9]+\\)-?[0-9]+--?[0-9]+)*)"
-									  + "(?:c.+?)?$");
+									  + "([^!]+)[-/]"
+                                      + idRegexPos
+                                      + "(?:x_([a-zA-Z0-9-_]+?))?"
+                                      + "$");
+        
     Pattern posRegex = Pattern.compile("\\(([0-9]+)\\)([0-9]+)-([0-9]+)");
+    
+    private static volatile Mac mac = null;
 
+        {
+            if (mac == null) {
+                // Load the secret key from the properties file
+                Properties prop = KrillProperties.loadDefaultProperties();
+
+                // The secret is only fix, if the matchIDs need to be treated as
+                // persistant identifiers, otherwise it only needs to be stable temporarily
+                String secretKey = KrillProperties.secret;
+
+                initMac(secretKey);
+            };
+        };
 
     public MatchIdentifier () {};
 
@@ -29,13 +65,14 @@
      * compatibility.
      */
     public MatchIdentifier (String id) {
-
+        
         // Replace for legacy reasons with incompatible versions of Kustvakt
         id = id.replaceAll("^(contains-|match-)([^!_\\.]+?)!\\2_", "$1$2_");
 
+        
         Matcher matcher = idRegex.matcher(id);
         if (matcher.matches()) {
-
+            
             // textSigle is provided directly
             if (matcher.group(1) == null && id.contains("/")) {
                 // Todo: potentially use UID!
@@ -56,17 +93,44 @@
             };
             // </legacy>
 
-            this.setStartPos(Integer.parseInt(matcher.group(3)));
-            this.setEndPos(Integer.parseInt(matcher.group(4)));
+            if (mac != null) {
+                
+                String posString = matcher.group(3);
 
-            if (matcher.group(5) != null) {
-                matcher = posRegex.matcher(matcher.group(5));
-                while (matcher.find()) {
-                    this.addPos(Integer.parseInt(matcher.group(2)),
-                            Integer.parseInt(matcher.group(3)),
-                            Integer.parseInt(matcher.group(1)));
+                String message = this.getTextSigle() + "::" + posString;
+                
+                String hmacStr = matcher.group(7);
+
+                
+                // No signature returned
+                if (hmacStr == null) {
+                    this.textSigle = "";
+                    return;
+                };
+
+                byte[] hmacBytes = Base64.getUrlDecoder().decode(hmacStr);
+
+                // Generate the HMAC hash
+                byte[] hmacVerify = mac.doFinal(message.getBytes(StandardCharsets.UTF_8));
+                
+                if (!MessageDigest.isEqual(hmacBytes, hmacVerify)) {
+                    this.textSigle = "";
+                    return;
                 };
             };
+
+            this.setStartPos(Integer.parseInt(matcher.group(4)));
+            this.setEndPos(Integer.parseInt(matcher.group(5)));
+
+            if (matcher.group(6) != null) {
+
+                matcher = posRegex.matcher(matcher.group(6));
+                while (matcher.find()) {
+                    this.addPos(Integer.parseInt(matcher.group(2)),
+                                Integer.parseInt(matcher.group(3)),
+                                Integer.parseInt(matcher.group(1)));
+                };
+            };            
         };
     };
 
@@ -123,13 +187,31 @@
             sb.append(this.docID);
         };
 
-        sb.append('-').append(this.getPositionString());
+        sb.append('-');
+
+        sb.append(this.getPositionString());
+
+        // Add signature
+        if (mac != null) {
+            String message = this.getTextSigle() + "::" + this.getPositionString();
+
+            // Generate the HMAC hash
+            byte[] hmac = mac.doFinal(message.getBytes(StandardCharsets.UTF_8));
+
+            String hmacStr = Base64.getUrlEncoder()
+                .withoutPadding()
+                .encodeToString(hmac);
+                
+            // Signature marker
+            sb.append("x_").append(hmacStr);
+        };
+        
         return sb.toString();
     };
 
 
     public String getPositionString () {
-        StringBuilder sb = new StringBuilder();
+        StringBuilder sb = new StringBuilder();        
         sb.append('p').append(this.startPos).append('-').append(this.endPos);
 
         // Get Position information
@@ -140,4 +222,18 @@
 
         return sb.toString();
     };
+
+    public static void initMac(String secretKey) {
+        if (secretKey != "") {
+            try {
+                mac = Mac.getInstance("HmacSHA256");
+                SecretKeySpec keySpec = new SecretKeySpec(secretKey.getBytes(StandardCharsets.UTF_8), "HmacSHA256");
+                mac.init(keySpec);
+            } catch (Exception e) {
+                log.error("Can't initialize match id signing: {}", e);
+            };
+        } else {
+            mac = null;
+        };
+    };
 };
diff --git a/src/main/java/de/ids_mannheim/korap/util/KrillProperties.java b/src/main/java/de/ids_mannheim/korap/util/KrillProperties.java
index 6ed4dbc..68c3e7c 100644
--- a/src/main/java/de/ids_mannheim/korap/util/KrillProperties.java
+++ b/src/main/java/de/ids_mannheim/korap/util/KrillProperties.java
@@ -28,6 +28,9 @@
     
     public static String namedVCPath = "";
     public static boolean isTest = false;
+
+    public static String secret = "";
+
     
     // Logger
     private final static Logger log = LoggerFactory
@@ -118,6 +121,8 @@
         String matchExpansion = prop.getProperty(
                 "krill.match." + "expansion.includeContextSize", "false");
         matchExpansionIncludeContextSize = Boolean.parseBoolean(matchExpansion);
+
+        secret = prop.getProperty("krill.secretB64", "");
     }
     
 
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestMatchIdentifier.java b/src/test/java/de/ids_mannheim/korap/index/TestMatchIdentifier.java
index a0ac953..57d8177 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestMatchIdentifier.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestMatchIdentifier.java
@@ -2,6 +2,7 @@
 
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertNotNull;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertTrue;
 
@@ -107,7 +108,6 @@
         assertEquals("corpus-1/doc-1/text-1", id.getTextSigle());
     };
 
-
     @Test
     public void posIdentifierExample1 () throws IOException {
         PosIdentifier id = new PosIdentifier();
@@ -120,6 +120,60 @@
         assertEquals(id.toString(), "token-c1!d1-p8");
     };
 
+    @Test
+    public void posIdentifierExampleSign () throws IOException {
+
+        MatchIdentifier.initMac("tree");
+        
+        MatchIdentifier id = new MatchIdentifier();
+        id.setTextSigle("aaa/bbb/ccc");
+        id.setStartPos(8);
+        id.setEndPos(10);
+        assertEquals(id.toString(), "match-aaa/bbb/ccc-p8-10x_ibY-h1k-VJ4aZjBFgTu8N4OI6xqcp-PkUrjQ9080Kr8");
+
+        id = new MatchIdentifier("match-aaa/bbb/ccc-p8-10x_ibY-h1k-VJ4aZjBFgTu8N4OI6xqcp-PkUrjQ9080Kr8");
+
+        assertNotNull(id);
+        assertEquals(id.getTextSigle(),"aaa/bbb/ccc");
+        assertEquals(id.getStartPos(),8);
+        assertEquals(id.getEndPos(),10);
+
+        // Fail - match wrong: p9 instead of p8
+        id = new MatchIdentifier("match-aaa/bbb/ccc-p9-10x_ibY-h1k-VJ4aZjBFgTu8N4OI6xqcp-PkUrjQ9080Kr8");
+
+        assertNotNull(id);
+        assertEquals(id.getTextSigle(),"");
+        assertEquals(id.getStartPos(),0);
+        assertEquals(id.getEndPos(),-1);
+
+        // Fail - signature wrong: 4Ou6 instead of 40I6
+        id = new MatchIdentifier("match-aaa/bbb/ccc-p8-10x_ibY-h1k-VJ4aZjBFgTu8N4Ou6xqcp-PkUrjQ9080Kr8");
+
+        assertNotNull(id);
+        assertEquals(id.getTextSigle(),"");
+        assertEquals(id.getStartPos(),0);
+        assertEquals(id.getEndPos(),-1);
+        
+        // Fail - signature wrong: vJ instead of VJ
+        id = new MatchIdentifier("match-aaa/bbb/ccc-p8-10x_ibY-h1k-vJ4aZjBFgTu8N4OI6xqcp-PkUrjQ9080Kr8");
+
+        assertNotNull(id);
+        assertEquals(id.getTextSigle(),"");
+        assertEquals(id.getStartPos(),0);
+        assertEquals(id.getEndPos(),-1);
+
+        // Fail - match wrong: aab instead of aaa
+        id = new MatchIdentifier("match-aab/bbb/ccc-p8-10x_ibY-h1k-VJ4aZjBFgTu8N4OI6xqcp-PkUrjQ9080Kr8");
+
+        assertNotNull(id);
+        assertEquals(id.getTextSigle(),"");
+        assertEquals(id.getStartPos(),0);
+        assertEquals(id.getEndPos(),-1);
+
+
+        MatchIdentifier.initMac("");
+    };
+    
 	@Test
     public void posIdentifierExample2 () throws IOException {
         PosIdentifier id = new PosIdentifier();
@@ -159,6 +213,35 @@
         assertEquals("ID (0)", "match-c1!d1-p7-9(2)7-8(1)8-8", km.getID());
     };
 
+    @Test
+    public void indexExample1Sign () throws IOException {
+        MatchIdentifier.initMac("tree");
+
+        KrillIndex ki = new KrillIndex();
+        ki.addDoc(createSimpleFieldDoc());
+        ki.commit();
+
+        QueryBuilder kq = new QueryBuilder("tokens");
+        Krill ks = new Krill(
+                kq.nr(2, kq.seq(kq.seg("s:b")).append(kq.nr(kq.seg("s:a")))));
+        Result kr = ki.search(ks);
+
+        assertEquals("totalResults", kr.getTotalResults(), 1);
+        assertEquals("StartPos (0)", kr.getMatch(0).startPos, 7);
+        assertEquals("EndPos (0)", kr.getMatch(0).endPos, 9);
+
+        Match km = kr.getMatch(0);
+
+        assertEquals("SnippetBrackets (0)", "... bcabca[[{2:b{1:a}}]]c",
+                km.getSnippetBrackets());
+        assertEquals("SnippetTokens (0)", "{\"left\":[\"b\",\"c\",\"a\",\"b\",\"c\",\"a\"],\"match\":[\"b\",\"a\"],\"right\":[\"c\"],\"classes\":[[2,0,1],[1,1,1]]}",
+                     km.getSnippetTokens().toString());
+        assertEquals("ID (0)", "match-c1!d1-p7-9(2)7-8(1)8-8x_07WRwmjA5EigwG8wYcURhnz_WkL9cepvU96hC2mp6SE", km.getID());
+
+        MatchIdentifier.initMac("");
+    };
+
+    
 
     @Test
     public void indexExample2 () throws IOException, QueryException {