Keep highlights that extend beyond a cut match (fixes #177)

Change-Id: I34213933ae40013ccfa8fd5ac21e53cb1613b5b3
diff --git a/Changes b/Changes
index 2a82202..d78dca0 100644
--- a/Changes
+++ b/Changes
@@ -1,3 +1,7 @@
+0.64.7 2026-04-28
+    - [bugfix] Keep highlights that extend beyond a cut match
+      (diewald; fixes #177; diewald; AI-assisted Claude Opus 4.6)
+
 0.64.6 2026-03-09
     - [performance] Add leaf cache. (diewald)
     - [bugfix] Fix fingerprinter (wasn't threadsafe; diewald)
diff --git a/pom.xml b/pom.xml
index 2f7731d..021d326 100644
--- a/pom.xml
+++ b/pom.xml
@@ -35,7 +35,7 @@
 
   <groupId>de.ids-mannheim.korap.krill</groupId>
   <artifactId>Krill</artifactId>
-  <version>0.64.6</version>
+  <version>0.64.7</version>
   <packaging>jar</packaging>
 
   <name>Krill</name>
diff --git a/src/main/java/de/ids_mannheim/korap/response/Match.java b/src/main/java/de/ids_mannheim/korap/response/Match.java
index 2700c16..4374783 100644
--- a/src/main/java/de/ids_mannheim/korap/response/Match.java
+++ b/src/main/java/de/ids_mannheim/korap/response/Match.java
@@ -351,6 +351,20 @@
 
                         this.addHighlight(start, end - 1, number);
                     }
+                    // Cut highlights that extend beyond a cut match
+                    else if (this.endCutted
+                            && unsignedByte(number) <= 128
+                            && start >= this.getStartPos()
+                            && start < this.getEndPos()
+                            && end > this.getEndPos()) {
+
+                        if (DEBUG) {
+                            log.trace("Add clamped highlight with class {}!",
+                                    unsignedByte(number));
+                        };
+
+                        this.addHighlight(start, this.getEndPos() - 1, number);
+                    }
                     else if (DEBUG) {
                         log.trace("Don't add highlight of class {}!",
                                 unsignedByte(number));
@@ -2314,8 +2328,16 @@
 					log.debug("Pagebreak keeps end position");
 				};
 
-                if (start < 0 ||
-                    ((end < 0 | start > endRelOffsetChar) && end != PB_MARKER && end != ALL_MARKER)) {
+                if (start < 0) {
+
+                    // Change start to 0 if end is positive and not a pagebreak or marker
+                    if (end >= 0 && end != PB_MARKER && end != ALL_MARKER) {
+                        start = 0;
+                    } else {
+                        continue;
+                    }
+                }
+                else if ((end < 0 | start > endRelOffsetChar) && end != PB_MARKER && end != ALL_MARKER) {
                     continue;
                 };
 
diff --git a/src/test/java/de/ids_mannheim/korap/highlight/TestHighlight.java b/src/test/java/de/ids_mannheim/korap/highlight/TestHighlight.java
index 11acbde..7e594cf 100644
--- a/src/test/java/de/ids_mannheim/korap/highlight/TestHighlight.java
+++ b/src/test/java/de/ids_mannheim/korap/highlight/TestHighlight.java
@@ -543,6 +543,106 @@
 
 	};
 
+    @Test
+    public void highlightCutMatchSerializationBug ()
+            throws IOException, QueryException {
+
+        // Test for issue #177: Match cut removes highlights when extended
+        KrillIndex ki = new KrillIndex();
+        FieldDocument fd = new FieldDocument();
+        fd.addString("ID", "doc-1");
+        fd.addString("UID", "1");
+        fd.addTV("base",
+                "abcdefghij",
+                "[(0-1)s:a|i:a|_0#0-1|-:t$<i>10]"
+                        + "[(1-2)s:b|i:b|_1#1-2]"
+                        + "[(2-3)s:c|i:c|_2#2-3]"
+                        + "[(3-4)s:d|i:d|_3#3-4]"
+                        + "[(4-5)s:e|i:e|_4#4-5]"
+                        + "[(5-6)s:f|i:f|_5#5-6]"
+                        + "[(6-7)s:g|i:g|_6#6-7]"
+                        + "[(7-8)s:h|i:h|_7#7-8]"
+                        + "[(8-9)s:i|i:i|_8#8-9]"
+                        + "[(9-10)s:j|i:j|_9#9-10]");
+        ki.addDoc(fd);
+        ki.commit();
+
+        QueryBuilder kq = new QueryBuilder("base");
+
+        // {1: seq(c, d, e, f, g)} - class wrapping 5 tokens
+        SpanQuery q = (SpanQuery) kq.nr(1,
+                kq.seq(kq.seg("s:c")).append(kq.seg("s:d"))
+                        .append(kq.seg("s:e")).append(kq.seg("s:f"))
+                        .append(kq.seg("s:g")))
+                .toQuery();
+
+        assertEquals("{1: spanNext(spanNext(spanNext(spanNext(base:s:c, base:s:d), base:s:e), base:s:f), base:s:g)}",
+                q.toString());
+
+        Krill ks = new Krill(q);
+        ks.setMaxTokenMatchSize(3);
+
+        Result kr = ks.apply(ki);
+        assertEquals(1, kr.getTotalResults());
+
+        Match km = kr.getMatch(0);
+        assertTrue(km.endCutted);
+
+        assertEquals("ab[[{1:cde}]<!>]fghij",
+                km.getSnippetBrackets());
+        assertEquals(
+                "<span class=\"context-left\">ab</span>"
+                        + "<span class=\"match\"><mark>"
+                        + "<mark class=\"class-1 level-0\">cde</mark>"
+                        + "</mark><span class=\"cutted\"></span></span>"
+                        + "<span class=\"context-right\">fghij</span>",
+                km.getSnippetHTML());
+    };
+
+
+    @Test
+    public void highlightCutMatchIndexBug ()
+            throws IOException, QueryException {
+
+        // Test for issue #177: Match cut removes highlights when extended
+        // Search in a real index with highlight on cut snippet
+        KrillIndex ki = new KrillIndex();
+        for (String i : new String[] { "00001", "00002" }) {
+            ki.addDoc(getClass().getResourceAsStream("/wiki/" + i + ".json.gz"),
+                    true);
+        };
+        ki.commit();
+
+        QueryBuilder kq = new QueryBuilder("tokens");
+
+        SpanQuery q = (SpanQuery) kq.nr(1,
+                kq.seq(kq.seg("s:Mit")).append(kq.seg("s:Ausnahme"))
+                        .append(kq.seg("s:von")).append(kq.seg("s:Fremdwörtern")))
+                .toQuery();
+
+        Krill ks = new Krill(q);
+        ks.setMaxTokenMatchSize(2);
+        Result kr = ks.apply(ki);
+        assertTrue("Should find at least one match", kr.getTotalResults() > 0);
+
+        Match km = kr.getMatch(0);
+        assertTrue(km.endCutted);
+
+        String brackets = km.getSnippetBrackets();
+        // The class-1 highlight must appear in the cut match
+        assertTrue("Brackets should contain class 1 highlight: " + brackets,
+                brackets.contains("{1:"));
+        assertTrue("Brackets should contain cut marker: " + brackets,
+                brackets.contains("<!>"));
+
+        String html = km.getSnippetHTML();
+        assertTrue("HTML should contain class-1 highlight: " + html,
+                html.contains("class-1"));
+        assertTrue("HTML should contain cutted marker: " + html,
+                html.contains("cutted"));
+    };
+
+
         @Test
         public void checkTokenArray () throws IOException, QueryException {