Fix negation in segment queries following De Morgan's law (fixes #93)

Change-Id: I062bb44f572b23012578486082df21989105669f
diff --git a/Changes b/Changes
index d356bd8..734db02 100644
--- a/Changes
+++ b/Changes
@@ -24,6 +24,8 @@
       (diewald; tests AI-assisted Claude Opus 4.6)
     - [bugfix] Keep classes in repetition queries
       (diewald; fixes #59; diewald; AI-assisted Claude Opus 4.6)
+    - [bugfix] Fix negation in segment queries following De Morgan's law
+      (diewald; fixes #93; diewald; AI-assisted Claude Opus 4.6)
 
 0.64.6 2026-03-09
     - [performance] Add leaf cache. (diewald)
diff --git a/src/main/java/de/ids_mannheim/korap/KrillQuery.java b/src/main/java/de/ids_mannheim/korap/KrillQuery.java
index ae21a82..c55d9df 100644
--- a/src/main/java/de/ids_mannheim/korap/KrillQuery.java
+++ b/src/main/java/de/ids_mannheim/korap/KrillQuery.java
@@ -1172,10 +1172,34 @@
 
                     case "relation:or":
 
+                        // Collect all OR operands, e.g. [pos=NN | pos!=VV], and check if all are negated
+                        ArrayList<SpanQueryWrapper> orParts = new ArrayList<>();
+                        boolean allNeg = true;
+                        for (JsonNode operand : operands) {
+                            SpanQueryWrapper part = this._segFromJson(operand);
+                            orParts.add(part);
+                            if (!part.isNegative())
+                                allNeg = false;
+                        };
+
+                        // De Morgan: e.g. [!pos=NN | !pos=VV] -> NOT([pos=NN & pos=VV])
+                        if (allNeg && orParts.size() > 0) {
+                            SpanSegmentQueryWrapper ssegOr = this.builder().seg();
+                            for (SpanQueryWrapper part : orParts) {
+                                ssegOr.without((SpanAlterQueryWrapper) part);
+                            };
+                            SpanAlterQueryWrapper negWrapper =
+                                new SpanAlterQueryWrapper(this.field);
+                            negWrapper.or(ssegOr);
+                            negWrapper.setNegative(true);
+                            return negWrapper;
+                        }
+
+                        // Normal case, e.g. [pos=NN | pos=VV] - build a standard OR
                         SpanAlterQueryWrapper ssaq = new SpanAlterQueryWrapper(
                                 this.field);
-                        for (JsonNode operand : operands) {
-                            ssaq.or(this._segFromJson(operand));
+                        for (SpanQueryWrapper part : orParts) {
+                            ssaq.or(part);
                         };
                         return ssaq;
                 };
diff --git a/src/main/java/de/ids_mannheim/korap/query/wrap/SpanSegmentQueryWrapper.java b/src/main/java/de/ids_mannheim/korap/query/wrap/SpanSegmentQueryWrapper.java
index cbc715b..b69b236 100644
--- a/src/main/java/de/ids_mannheim/korap/query/wrap/SpanSegmentQueryWrapper.java
+++ b/src/main/java/de/ids_mannheim/korap/query/wrap/SpanSegmentQueryWrapper.java
@@ -194,17 +194,19 @@
                 || (this.inclusive.size() + this.exclusive.size() == 0)) {
             return (SpanQuery) null;
         }
+        // Both inclusive and exclusive, e.g. [orth=Baum & pos!=NN]
         else if (this.inclusive.size() >= 1 && this.exclusive.size() >= 1) {
             return (SpanQuery) new SpanNotQuery(
                     this._listToQuery(this.inclusive),
                     this._listToOrQuery(this.exclusive));
         }
 
-        // These are now identical but may be negative
+        // Exclusives only, e.g. [pos!=NN & pos!=VV] -- OR-combine for exclusion
         else if (this.inclusive.size() == 0 && this.exclusive.size() >= 1) {
-            return (SpanQuery) this._listToQuery(this.exclusive);
+            return (SpanQuery) this._listToOrQuery(this.exclusive);
         }
 
+        // Inclusives only, e.g. [orth=Baum & pos=NN] -- AND-combine into segment
         else if (this.inclusive.size() >= 1 && this.exclusive.size() == 0) {
             return (SpanQuery) this._listToQuery(this.inclusive);
         };
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestSegmentNegationIndex.java b/src/test/java/de/ids_mannheim/korap/index/TestSegmentNegationIndex.java
index 1bd1dcd..338c62c 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestSegmentNegationIndex.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestSegmentNegationIndex.java
@@ -116,6 +116,191 @@
     };
 
 
+    @Test
+    public void testAllNegationsInTermGroup () throws Exception {
+        // [orth!="des" & orth!="ihres"] [orth="Hauses"]
+        ki = new KrillIndex();
+
+        FieldDocument fd1 = new FieldDocument();
+        fd1.addString("ID", "doc-neg-0");
+        fd1.addTV("tokens", "des Hauses",
+                "[(0-3)s:des|i:des|_1$<i>0<i>1]"
+                + "[(4-10)s:Hauses|i:hauses|_2$<i>1<i>2]");
+        ki.addDoc(fd1);
+
+        FieldDocument fd2 = new FieldDocument();
+        fd2.addString("ID", "doc-neg-1");
+        fd2.addTV("tokens", "ihres Hauses",
+                "[(0-5)s:ihres|i:ihres|_1$<i>0<i>1]"
+                + "[(6-12)s:Hauses|i:hauses|_2$<i>1<i>2]");
+        ki.addDoc(fd2);
+
+        FieldDocument fd3 = new FieldDocument();
+        fd3.addString("ID", "doc-neg-2");
+        fd3.addTV("tokens", "eines Hauses",
+                "[(0-5)s:eines|i:eines|_1$<i>0<i>1]"
+                + "[(6-12)s:Hauses|i:hauses|_2$<i>1<i>2]");
+        ki.addDoc(fd3);
+
+        FieldDocument fd4 = new FieldDocument();
+        fd4.addString("ID", "doc-neg-3");
+        fd4.addTV("tokens", "meines Hauses",
+                "[(0-6)s:meines|i:meines|_1$<i>0<i>1]"
+                + "[(7-13)s:Hauses|i:hauses|_2$<i>1<i>2]");
+        ki.addDoc(fd4);
+
+        ki.commit();
+
+        // Search using KoralQuery JSON:
+        // [orth!="des" & orth!="ihres"] [orth="Hauses"]
+        String json = "{\"query\": {\"@type\": \"koral:group\", \"operands\": ["
+                + "{\"@type\": \"koral:token\", \"wrap\": {"
+                + "\"@type\": \"koral:termGroup\", \"operands\": ["
+                + "{\"@type\": \"koral:term\", \"key\": \"des\", \"layer\": \"orth\", \"match\": \"match:ne\", \"type\": \"type:regex\"},"
+                + "{\"@type\": \"koral:term\", \"key\": \"ihres\", \"layer\": \"orth\", \"match\": \"match:ne\", \"type\": \"type:regex\"}"
+                + "], \"relation\": \"relation:and\"}},"
+                + "{\"@type\": \"koral:token\", \"wrap\": {"
+                + "\"@type\": \"koral:term\", \"key\": \"Hauses\", \"layer\": \"orth\", \"match\": \"match:eq\", \"type\": \"type:regex\"}}"
+                + "], \"operation\": \"operation:sequence\"}}";
+
+        Krill krill = new Krill(json);
+        kr = ki.search(krill);
+
+        assertEquals("totalResults", 2, kr.getTotalResults());
+        assertEquals("StartPos (0)", 0, kr.getMatch(0).startPos);
+        assertEquals("EndPos (0)", 2, kr.getMatch(0).endPos);
+        assertEquals("StartPos (1)", 0, kr.getMatch(1).startPos);
+        assertEquals("EndPos (1)", 2, kr.getMatch(1).endPos);
+    }
+
+
+    @Test
+    public void testAllNegationsOrInTermGroup () throws Exception {
+        // [orth!="des" | orth!="ihres"] [orth="Hauses"]
+        // By De Morgan: NOT(des) OR NOT(ihres) = NOT(des AND ihres)
+        // Since a token can only have one orth value,
+        // (des AND ihres) is always false, so NOT(false) = true.
+        // Every token matches, so all "[...] Hauses" docs match.
+        ki = new KrillIndex();
+
+        FieldDocument fd1 = new FieldDocument();
+        fd1.addString("ID", "doc-neg-0");
+        fd1.addTV("tokens", "des Hauses",
+                "[(0-3)s:des|i:des|_1$<i>0<i>1]"
+                + "[(4-10)s:Hauses|i:hauses|_2$<i>1<i>2]");
+        ki.addDoc(fd1);
+
+        FieldDocument fd2 = new FieldDocument();
+        fd2.addString("ID", "doc-neg-1");
+        fd2.addTV("tokens", "ihres Hauses",
+                "[(0-5)s:ihres|i:ihres|_1$<i>0<i>1]"
+                + "[(6-12)s:Hauses|i:hauses|_2$<i>1<i>2]");
+        ki.addDoc(fd2);
+
+        FieldDocument fd3 = new FieldDocument();
+        fd3.addString("ID", "doc-neg-2");
+        fd3.addTV("tokens", "eines Hauses",
+                "[(0-5)s:eines|i:eines|_1$<i>0<i>1]"
+                + "[(6-12)s:Hauses|i:hauses|_2$<i>1<i>2]");
+        ki.addDoc(fd3);
+
+        FieldDocument fd4 = new FieldDocument();
+        fd4.addString("ID", "doc-neg-3");
+        fd4.addTV("tokens", "meines Hauses",
+                "[(0-6)s:meines|i:meines|_1$<i>0<i>1]"
+                + "[(7-13)s:Hauses|i:hauses|_2$<i>1<i>2]");
+        ki.addDoc(fd4);
+
+        ki.commit();
+
+        // [orth!="des" | orth!="ihres"] [orth="Hauses"]
+        String json = "{\"query\": {\"@type\": \"koral:group\", \"operands\": ["
+                + "{\"@type\": \"koral:token\", \"wrap\": {"
+                + "\"@type\": \"koral:termGroup\", \"operands\": ["
+                + "{\"@type\": \"koral:term\", \"key\": \"des\", \"layer\": \"orth\", \"match\": \"match:ne\", \"type\": \"type:regex\"},"
+                + "{\"@type\": \"koral:term\", \"key\": \"ihres\", \"layer\": \"orth\", \"match\": \"match:ne\", \"type\": \"type:regex\"}"
+                + "], \"relation\": \"relation:or\"}},"
+                + "{\"@type\": \"koral:token\", \"wrap\": {"
+                + "\"@type\": \"koral:term\", \"key\": \"Hauses\", \"layer\": \"orth\", \"match\": \"match:eq\", \"type\": \"type:regex\"}}"
+                + "], \"operation\": \"operation:sequence\"}}";
+
+        Krill krill = new Krill(json);
+        kr = ki.search(krill);
+
+        assertEquals("totalResults", 4, kr.getTotalResults());
+    }
+
+
+    @Test
+    public void testAllNegationsOrMultiValuedLayer () throws Exception {
+        // [marmot/p!=ADJ | marmot/p!=NN] [orth="Baum"]
+        // By De Morgan: NOT(ADJ) OR NOT(NN) = NOT(ADJ AND NN)
+        // A position CAN have multiple POS tags (e.g. ADJ and NN).
+        // Only tokens with BOTH ADJ and NN are excluded.
+        // However - this may be up to interpretation, as ADJ is !=NN and vice versa!
+        ki = new KrillIndex();
+
+        // Token "alte" has BOTH marmot/p:ADJ and marmot/p:NN
+        FieldDocument fd1 = new FieldDocument();
+        fd1.addString("ID", "doc-multi-0");
+        fd1.addTV("tokens", "alte Baum",
+                "[(0-4)s:alte|i:alte|marmot/p:ADJ|marmot/p:NN|_1$<i>0<i>1]"
+                + "[(5-9)s:Baum|i:baum|_2$<i>1<i>2]");
+        ki.addDoc(fd1);
+
+        // Token "grosse" has only marmot/p:ADJ (not NN)
+        FieldDocument fd2 = new FieldDocument();
+        fd2.addString("ID", "doc-multi-1");
+        fd2.addTV("tokens", "grosse Baum",
+                "[(0-6)s:grosse|i:grosse|marmot/p:ADJ|_1$<i>0<i>1]"
+                + "[(7-11)s:Baum|i:baum|_2$<i>1<i>2]");
+        ki.addDoc(fd2);
+
+        // Token "kleiner" has only marmot/p:NN (not ADJ)
+        FieldDocument fd3 = new FieldDocument();
+        fd3.addString("ID", "doc-multi-2");
+        fd3.addTV("tokens", "kleiner Baum",
+                "[(0-7)s:kleiner|i:kleiner|marmot/p:NN|_1$<i>0<i>1]"
+                + "[(8-12)s:Baum|i:baum|_2$<i>1<i>2]");
+        ki.addDoc(fd3);
+
+        // Token "der" has marmot/p:DET (neither ADJ nor NN)
+        FieldDocument fd4 = new FieldDocument();
+        fd4.addString("ID", "doc-multi-3");
+        fd4.addTV("tokens", "der Baum",
+                "[(0-3)s:der|i:der|marmot/p:DET|_1$<i>0<i>1]"
+                + "[(4-8)s:Baum|i:baum|_2$<i>1<i>2]");
+        ki.addDoc(fd4);
+
+        ki.commit();
+
+        // [marmot/p!=ADJ | marmot/p!=NN] [orth="Baum"]
+        // De Morgan: NOT(ADJ AND NN) - only exclude tokens with BOTH
+        String json = "{\"query\": {\"@type\": \"koral:group\", \"operands\": ["
+                + "{\"@type\": \"koral:token\", \"wrap\": {"
+                + "\"@type\": \"koral:termGroup\", \"operands\": ["
+                + "{\"@type\": \"koral:term\", \"foundry\": \"marmot\", \"key\": \"ADJ\", \"layer\": \"pos\", \"match\": \"match:ne\", \"type\": \"type:regex\"},"
+                + "{\"@type\": \"koral:term\", \"foundry\": \"marmot\", \"key\": \"NN\", \"layer\": \"pos\", \"match\": \"match:ne\", \"type\": \"type:regex\"}"
+                + "], \"relation\": \"relation:or\"}},"
+                + "{\"@type\": \"koral:token\", \"wrap\": {"
+                + "\"@type\": \"koral:term\", \"key\": \"Baum\", \"layer\": \"orth\", \"match\": \"match:eq\", \"type\": \"type:regex\"}}"
+                + "], \"operation\": \"operation:sequence\"}}";
+
+        Krill krill = new Krill(json);
+        kr = ki.search(krill);
+
+        // doc-multi-0: "alte" has BOTH ADJ and NN -> ADJ AND NN = true
+        //   -> NOT(true) = false -> excluded
+        // doc-multi-1: "grosse" has only ADJ -> ADJ AND NN = false
+        //   -> NOT(false) = true -> matches
+        // doc-multi-2: "kleiner" has only NN -> ADJ AND NN = false
+        //   -> NOT(false) = true -> matches
+        // doc-multi-3: "der" has DET -> ADJ AND NN = false
+        //   -> NOT(false) = true -> matches
+        assertEquals("totalResults", 3, kr.getTotalResults());
+    }
+
+
     private FieldDocument createFieldDoc0 () {
         fd = new FieldDocument();
         fd.addString("ID", "doc-0");
diff --git a/src/test/java/de/ids_mannheim/korap/query/TestKrillQueryJSON.java b/src/test/java/de/ids_mannheim/korap/query/TestKrillQueryJSON.java
index 5947790..fbd945e 100644
--- a/src/test/java/de/ids_mannheim/korap/query/TestKrillQueryJSON.java
+++ b/src/test/java/de/ids_mannheim/korap/query/TestKrillQueryJSON.java
@@ -169,13 +169,9 @@
                 getClass().getResource("/queries/bsp11.jsonld").getFile());
 
         // [base!=Katze | orth!=Katzen]
-        /*
-          Imagine a([^b]|[^c])d
-          Matches abd and acd
-          Interpretation would be not(spanAnd(...))
-        */
+        // De Morgan: NOT(Katze) OR NOT(Katzen) = NOT(Katze AND Katzen)
         assertEquals(
-            "spanOr([tokens:mate/l:Katze, tokens:s:Katzen])",
+            "spanSegment(tokens:mate/l:Katze, tokens:s:Katzen)",
             sqwi.toQuery().toString());
         assertTrue(sqwi.isNegative());
     };
@@ -747,6 +743,52 @@
             kq.fromKoral(json).toQuery().toString());
     };
 
+    public void queryJSONallNegationInGroup () throws QueryException {
+        // [orth!="des" & orth!="ihres"]
+        String json = getJsonString(getClass()
+                                    .getResource("/queries/segment/all-negation-in-group.jsonld")
+                                    .getFile());
+
+        KrillQuery kq = new KrillQuery("tokens");
+        SpanQueryWrapper sqwi = kq.fromKoral(json);
+        assertEquals(
+            "spanOr([SpanMultiTermQueryWrapper(tokens:/s:des/), SpanMultiTermQueryWrapper(tokens:/s:ihres/)])",
+            sqwi.toQuery().toString());
+        assertTrue(sqwi.isNegative());
+    };
+
+    @Test
+    public void queryJSONallNegationInGroupThree () throws QueryException {
+        // [orth!="des" & orth!="ihres" & orth!="eines"]
+        // By De Morgan's law: NOT(A) AND NOT(B) AND NOT(C) = NOT(A OR B OR C)
+        String json = getJsonString(getClass()
+                                    .getResource("/queries/segment/all-negation-in-group-three.jsonld")
+                                    .getFile());
+
+        KrillQuery kq = new KrillQuery("tokens");
+        SpanQueryWrapper sqwi = kq.fromKoral(json);
+        assertEquals(
+            "spanOr([SpanMultiTermQueryWrapper(tokens:/s:des/), SpanMultiTermQueryWrapper(tokens:/s:ihres/), SpanMultiTermQueryWrapper(tokens:/s:eines/)])",
+            sqwi.toQuery().toString());
+        assertTrue(sqwi.isNegative());
+    };
+
+    @Test
+    public void queryJSONallNegationInGroupOrThree () throws QueryException {
+        // [orth!="des" | orth!="ihres" | orth!="eines"]
+        // By De Morgan's law: NOT(A) OR NOT(B) OR NOT(C) = NOT(A AND B AND C)
+        String json = getJsonString(getClass()
+                                    .getResource("/queries/segment/all-negation-in-group-or-three.jsonld")
+                                    .getFile());
+
+        KrillQuery kq = new KrillQuery("tokens");
+        SpanQueryWrapper sqwi = kq.fromKoral(json);
+        assertEquals(
+            "spanSegment(spanSegment(SpanMultiTermQueryWrapper(tokens:/s:des/), SpanMultiTermQueryWrapper(tokens:/s:ihres/)), SpanMultiTermQueryWrapper(tokens:/s:eines/))",
+            sqwi.toQuery().toString());
+        assertTrue(sqwi.isNegative());
+    };
+
     @Test
     public void queryJSONregexFail () {
         // "Leserin.{,3}"
diff --git a/src/test/resources/queries/segment/all-negation-in-group-or-three.jsonld b/src/test/resources/queries/segment/all-negation-in-group-or-three.jsonld
new file mode 100644
index 0000000..a350e5e
--- /dev/null
+++ b/src/test/resources/queries/segment/all-negation-in-group-or-three.jsonld
@@ -0,0 +1,36 @@
+{
+  "@context": "http://ids-mannheim.de/ns/KorAP/json-ld/v0.1/context.jsonld",
+  "query": {
+    "@type": "koral:token",
+    "wrap": {
+      "@type": "koral:termGroup",
+      "operands": [
+        {
+          "@type": "koral:term",
+          "foundry": "opennlp",
+          "key": "des",
+          "layer": "orth",
+          "match": "match:ne",
+          "type": "type:regex"
+        },
+        {
+          "@type": "koral:term",
+          "foundry": "opennlp",
+          "key": "ihres",
+          "layer": "orth",
+          "match": "match:ne",
+          "type": "type:regex"
+        },
+        {
+          "@type": "koral:term",
+          "foundry": "opennlp",
+          "key": "eines",
+          "layer": "orth",
+          "match": "match:ne",
+          "type": "type:regex"
+        }
+      ],
+      "relation": "relation:or"
+    }
+  }
+}
diff --git a/src/test/resources/queries/segment/all-negation-in-group-three.jsonld b/src/test/resources/queries/segment/all-negation-in-group-three.jsonld
new file mode 100644
index 0000000..70b436d
--- /dev/null
+++ b/src/test/resources/queries/segment/all-negation-in-group-three.jsonld
@@ -0,0 +1,36 @@
+{
+  "@context": "http://ids-mannheim.de/ns/KorAP/json-ld/v0.1/context.jsonld",
+  "query": {
+    "@type": "koral:token",
+    "wrap": {
+      "@type": "koral:termGroup",
+      "operands": [
+        {
+          "@type": "koral:term",
+          "foundry": "opennlp",
+          "key": "des",
+          "layer": "orth",
+          "match": "match:ne",
+          "type": "type:regex"
+        },
+        {
+          "@type": "koral:term",
+          "foundry": "opennlp",
+          "key": "ihres",
+          "layer": "orth",
+          "match": "match:ne",
+          "type": "type:regex"
+        },
+        {
+          "@type": "koral:term",
+          "foundry": "opennlp",
+          "key": "eines",
+          "layer": "orth",
+          "match": "match:ne",
+          "type": "type:regex"
+        }
+      ],
+      "relation": "relation:and"
+    }
+  }
+}
diff --git a/src/test/resources/queries/segment/all-negation-in-group.jsonld b/src/test/resources/queries/segment/all-negation-in-group.jsonld
new file mode 100644
index 0000000..6e2ceb6
--- /dev/null
+++ b/src/test/resources/queries/segment/all-negation-in-group.jsonld
@@ -0,0 +1,28 @@
+{
+  "@context": "http://ids-mannheim.de/ns/KorAP/json-ld/v0.1/context.jsonld",
+  "query": {
+    "@type": "koral:token",
+    "wrap": {
+      "@type": "koral:termGroup",
+      "operands": [
+        {
+          "@type": "koral:term",
+          "foundry": "opennlp",
+          "key": "des",
+          "layer": "orth",
+          "match": "match:ne",
+          "type": "type:regex"
+        },
+        {
+          "@type": "koral:term",
+          "foundry": "opennlp",
+          "key": "ihres",
+          "layer": "orth",
+          "match": "match:ne",
+          "type": "type:regex"
+        }
+      ],
+      "relation": "relation:and"
+    }
+  }
+}