| margaretha | 9fbffa9 | 2015-05-12 18:25:23 +0200 | [diff] [blame] | 1 | package de.ids_mannheim.korap.query.spans; |
| 2 | |
| 3 | import static de.ids_mannheim.korap.util.KrillByte.byte2int; |
| 4 | |
| 5 | import java.io.IOException; |
| 6 | import java.util.BitSet; |
| 7 | import java.util.Map; |
| 8 | |
| Akron | 700c1eb | 2015-09-25 16:57:30 +0200 | [diff] [blame] | 9 | import org.apache.lucene.index.LeafReaderContext; |
| margaretha | 9fbffa9 | 2015-05-12 18:25:23 +0200 | [diff] [blame] | 10 | import org.apache.lucene.index.Term; |
| 11 | import org.apache.lucene.index.TermContext; |
| 12 | import org.apache.lucene.util.Bits; |
| 13 | |
| 14 | import de.ids_mannheim.korap.query.SpanClassFilterQuery; |
| 15 | import de.ids_mannheim.korap.query.SpanClassFilterQuery.ClassOperation; |
| 16 | |
| 17 | public class ClassFilteredSpans extends SimpleSpans { |
| 18 | |
| 19 | private BitSet bitset1, bitset2; |
| 20 | private ClassOperation operation; |
| 21 | private byte classNum1, classNum2; |
| 22 | |
| Akron | bb5d173 | 2015-06-22 01:22:40 +0200 | [diff] [blame] | 23 | |
| margaretha | 9fbffa9 | 2015-05-12 18:25:23 +0200 | [diff] [blame] | 24 | public ClassFilteredSpans (SpanClassFilterQuery query, |
| Akron | 700c1eb | 2015-09-25 16:57:30 +0200 | [diff] [blame] | 25 | LeafReaderContext context, Bits acceptDocs, |
| Akron | bb5d173 | 2015-06-22 01:22:40 +0200 | [diff] [blame] | 26 | Map<Term, TermContext> termContexts) |
| 27 | throws IOException { |
| margaretha | 9fbffa9 | 2015-05-12 18:25:23 +0200 | [diff] [blame] | 28 | super(query, context, acceptDocs, termContexts); |
| 29 | this.operation = query.getOperation(); |
| 30 | this.classNum1 = query.getClassNum1(); |
| 31 | this.classNum2 = query.getClassNum2(); |
| 32 | hasMoreSpans = firstSpans.next(); |
| 33 | } |
| 34 | |
| Akron | bb5d173 | 2015-06-22 01:22:40 +0200 | [diff] [blame] | 35 | |
| margaretha | 9fbffa9 | 2015-05-12 18:25:23 +0200 | [diff] [blame] | 36 | @Override |
| Akron | bb5d173 | 2015-06-22 01:22:40 +0200 | [diff] [blame] | 37 | public boolean next () throws IOException { |
| margaretha | 9fbffa9 | 2015-05-12 18:25:23 +0200 | [diff] [blame] | 38 | while (hasMoreSpans) { |
| 39 | matchPayload.clear(); |
| 40 | bitset1 = null; |
| 41 | bitset2 = null; |
| 42 | if (isClassOperationValid()) { |
| 43 | this.matchStartPosition = firstSpans.start(); |
| 44 | this.matchEndPosition = firstSpans.end(); |
| 45 | this.matchDocNumber = firstSpans.doc(); |
| 46 | this.matchPayload.addAll(firstSpans.getPayload()); |
| 47 | hasMoreSpans = firstSpans.next(); |
| 48 | return true; |
| 49 | } |
| 50 | hasMoreSpans = firstSpans.next(); |
| 51 | } |
| 52 | return false; |
| 53 | } |
| 54 | |
| Akron | bb5d173 | 2015-06-22 01:22:40 +0200 | [diff] [blame] | 55 | |
| 56 | private boolean isClassOperationValid () throws IOException { |
| margaretha | 9fbffa9 | 2015-05-12 18:25:23 +0200 | [diff] [blame] | 57 | setBitsets(); |
| Akron | c5e73b8 | 2016-01-14 17:01:18 +0100 | [diff] [blame] | 58 | |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 59 | int cardinality = Math.max(bitset1.cardinality(), |
| 60 | bitset2.cardinality()); |
| Akron | c5e73b8 | 2016-01-14 17:01:18 +0100 | [diff] [blame] | 61 | |
| margaretha | 9fbffa9 | 2015-05-12 18:25:23 +0200 | [diff] [blame] | 62 | bitset1.and(bitset2); |
| 63 | // System.out.println("cardinality:" + cardinality); |
| 64 | switch (operation) { |
| 65 | case DISJOINT: |
| Akron | bb5d173 | 2015-06-22 01:22:40 +0200 | [diff] [blame] | 66 | if (bitset1.cardinality() == 0) |
| 67 | return true; |
| margaretha | 9fbffa9 | 2015-05-12 18:25:23 +0200 | [diff] [blame] | 68 | break; |
| 69 | case EQUAL: |
| Akron | bb5d173 | 2015-06-22 01:22:40 +0200 | [diff] [blame] | 70 | if (cardinality == bitset1.cardinality()) |
| 71 | return true; |
| margaretha | 9fbffa9 | 2015-05-12 18:25:23 +0200 | [diff] [blame] | 72 | break; |
| 73 | case DIFFER: |
| 74 | if (cardinality == 0 || cardinality != bitset1.cardinality()) |
| 75 | return true; |
| 76 | break; |
| 77 | case INCLUDE: |
| 78 | if (bitset1.cardinality() == bitset2.cardinality()) { |
| 79 | return true; |
| 80 | } |
| 81 | break; |
| 82 | case INTERSECT: |
| Akron | bb5d173 | 2015-06-22 01:22:40 +0200 | [diff] [blame] | 83 | if (bitset1.cardinality() > 0) |
| 84 | return true; |
| margaretha | 9fbffa9 | 2015-05-12 18:25:23 +0200 | [diff] [blame] | 85 | break; |
| 86 | } |
| 87 | |
| 88 | return false; |
| 89 | } |
| 90 | |
| Akron | bb5d173 | 2015-06-22 01:22:40 +0200 | [diff] [blame] | 91 | |
| 92 | private void setBitsets () throws IOException { |
| margaretha | 9fbffa9 | 2015-05-12 18:25:23 +0200 | [diff] [blame] | 93 | BitSet bs = new BitSet(); |
| 94 | int start, end; |
| 95 | // System.out.println("------------------------"); |
| 96 | for (byte[] payload : firstSpans.getPayload()) { |
| Akron | 6cc7b7b | 2016-01-14 21:39:18 +0100 | [diff] [blame] | 97 | /* |
| Akron | c5e73b8 | 2016-01-14 17:01:18 +0100 | [diff] [blame] | 98 | System.err.println( |
| 99 | "** " + |
| 100 | payload[0] + |
| 101 | "|" + |
| 102 | byte2int(payload, 1) + |
| 103 | "|" + |
| 104 | byte2int(payload, 5) + |
| 105 | "|" + |
| 106 | payload[8] + |
| 107 | " **"); |
| Akron | 6cc7b7b | 2016-01-14 21:39:18 +0100 | [diff] [blame] | 108 | */ |
| Akron | c5e73b8 | 2016-01-14 17:01:18 +0100 | [diff] [blame] | 109 | if (payload[0] == 0) { |
| 110 | start = byte2int(payload, 1) + 1; |
| 111 | end = byte2int(payload, 5) + 1; |
| 112 | if (payload[9] == classNum1) { |
| margaretha | 9fbffa9 | 2015-05-12 18:25:23 +0200 | [diff] [blame] | 113 | // System.out.println("bitset1 " + start + " " + |
| 114 | // end); |
| 115 | if (bitset1 == null) { |
| 116 | bitset1 = new BitSet(); |
| 117 | bitset1.set(start, end); |
| 118 | } |
| 119 | else { |
| 120 | bs.set(start, end); |
| 121 | bitset1.or(bs); |
| 122 | } |
| 123 | // System.out.println(bitset1); |
| 124 | } |
| Akron | c5e73b8 | 2016-01-14 17:01:18 +0100 | [diff] [blame] | 125 | else if (payload[9] == classNum2) { |
| margaretha | 9fbffa9 | 2015-05-12 18:25:23 +0200 | [diff] [blame] | 126 | // System.out.println("#bitset2 " + start + " " + |
| 127 | // end); |
| 128 | if (bitset2 == null) { |
| 129 | bitset2 = new BitSet(); |
| 130 | bitset2.set(start, end); |
| 131 | } |
| 132 | else { |
| 133 | bs.set(start, end); |
| 134 | bitset2.or(bs); |
| 135 | // System.out.println("OR #2"); |
| 136 | } |
| 137 | // System.out.println(bitset2); |
| 138 | } |
| 139 | } |
| 140 | } |
| 141 | |
| 142 | } |
| 143 | |
| Akron | bb5d173 | 2015-06-22 01:22:40 +0200 | [diff] [blame] | 144 | |
| margaretha | 9fbffa9 | 2015-05-12 18:25:23 +0200 | [diff] [blame] | 145 | @Override |
| Akron | bb5d173 | 2015-06-22 01:22:40 +0200 | [diff] [blame] | 146 | public boolean skipTo (int target) throws IOException { |
| margaretha | 9fbffa9 | 2015-05-12 18:25:23 +0200 | [diff] [blame] | 147 | // TODO Auto-generated method stub |
| 148 | return false; |
| 149 | } |
| 150 | |
| Akron | bb5d173 | 2015-06-22 01:22:40 +0200 | [diff] [blame] | 151 | |
| margaretha | 9fbffa9 | 2015-05-12 18:25:23 +0200 | [diff] [blame] | 152 | @Override |
| Akron | bb5d173 | 2015-06-22 01:22:40 +0200 | [diff] [blame] | 153 | public long cost () { |
| margaretha | 9fbffa9 | 2015-05-12 18:25:23 +0200 | [diff] [blame] | 154 | // TODO Auto-generated method stub |
| 155 | return 0; |
| 156 | } |
| 157 | } |