| Nils Diewald | baf68c5 | 2013-11-20 13:22:19 +0000 | [diff] [blame] | 1 | package de.ids_mannheim.korap; |
| 2 | |
| 3 | import java.util.*; |
| 4 | import java.io.IOException; |
| 5 | import org.apache.lucene.search.QueryWrapperFilter; |
| 6 | import org.apache.lucene.search.NumericRangeFilter; |
| 7 | import org.apache.lucene.search.Filter; |
| Nils Diewald | c925b49 | 2013-12-03 23:56:10 +0000 | [diff] [blame] | 8 | |
| Nils Diewald | baf68c5 | 2013-11-20 13:22:19 +0000 | [diff] [blame] | 9 | import de.ids_mannheim.korap.KorapIndex; |
| 10 | import de.ids_mannheim.korap.KorapResult; |
| 11 | import de.ids_mannheim.korap.KorapFilter; |
| Nils Diewald | c925b49 | 2013-12-03 23:56:10 +0000 | [diff] [blame] | 12 | |
| Nils Diewald | baf68c5 | 2013-11-20 13:22:19 +0000 | [diff] [blame] | 13 | import de.ids_mannheim.korap.util.KorapDate; |
| Nils Diewald | b1c3b65 | 2013-12-28 22:47:00 +0000 | [diff] [blame] | 14 | import de.ids_mannheim.korap.util.QueryException; |
| Nils Diewald | baf68c5 | 2013-11-20 13:22:19 +0000 | [diff] [blame] | 15 | import de.ids_mannheim.korap.filter.BooleanFilter; |
| Nils Diewald | 5def8bc | 2013-11-28 19:26:54 +0000 | [diff] [blame] | 16 | import de.ids_mannheim.korap.filter.FilterOperation; |
| Nils Diewald | baf68c5 | 2013-11-20 13:22:19 +0000 | [diff] [blame] | 17 | import org.apache.lucene.search.spans.SpanQuery; |
| 18 | import org.apache.lucene.search.Query; |
| 19 | import org.apache.lucene.search.FilteredQuery; |
| 20 | import org.apache.lucene.index.AtomicReaderContext; |
| 21 | import org.apache.lucene.util.FixedBitSet; |
| 22 | import org.apache.lucene.util.Bits; |
| 23 | import org.apache.lucene.search.DocIdSetIterator; |
| 24 | import org.apache.lucene.search.DocIdSet; |
| 25 | |
| Nils Diewald | 01b4ce3 | 2013-12-05 22:39:25 +0000 | [diff] [blame] | 26 | import com.fasterxml.jackson.databind.ObjectMapper; |
| 27 | import com.fasterxml.jackson.databind.JsonNode; |
| 28 | |
| Nils Diewald | 2276e1c | 2014-04-10 15:01:59 +0000 | [diff] [blame] | 29 | import java.io.StringWriter; |
| 30 | |
| Nils Diewald | baf68c5 | 2013-11-20 13:22:19 +0000 | [diff] [blame] | 31 | import org.slf4j.Logger; |
| 32 | import org.slf4j.LoggerFactory; |
| 33 | |
| Nils Diewald | 5def8bc | 2013-11-28 19:26:54 +0000 | [diff] [blame] | 34 | // TODO: Make a cache for the bits!!! DELETE IT IN CASE OF AN EXTENSION OR A FILTER! |
| Nils Diewald | 6802acd | 2014-03-18 18:29:30 +0000 | [diff] [blame] | 35 | // Todo: Maybe use radomaccessfilterstrategy |
| Nils Diewald | 2cd1c3d | 2014-01-08 22:53:08 +0000 | [diff] [blame] | 36 | // TODO: Maybe a constantScoreQuery can make things faster? |
| Nils Diewald | baf68c5 | 2013-11-20 13:22:19 +0000 | [diff] [blame] | 37 | |
| 38 | // accepts as first parameter the index |
| 39 | // THIS MAY CHANGE for stuff like combining virtual collections |
| 40 | // See http://mail-archives.apache.org/mod_mbox/lucene-java-user/200805.mbox/%3C17080852.post@talk.nabble.com%3E |
| 41 | |
| Nils Diewald | baf68c5 | 2013-11-20 13:22:19 +0000 | [diff] [blame] | 42 | public class KorapCollection { |
| 43 | private KorapIndex index; |
| Nils Diewald | baf68c5 | 2013-11-20 13:22:19 +0000 | [diff] [blame] | 44 | private KorapDate created; |
| Nils Diewald | c925b49 | 2013-12-03 23:56:10 +0000 | [diff] [blame] | 45 | private String id; |
| Nils Diewald | 01b4ce3 | 2013-12-05 22:39:25 +0000 | [diff] [blame] | 46 | private String error; |
| Nils Diewald | 5def8bc | 2013-11-28 19:26:54 +0000 | [diff] [blame] | 47 | private ArrayList<FilterOperation> filter; |
| Nils Diewald | dfb21ea | 2013-11-21 14:26:47 +0000 | [diff] [blame] | 48 | private int filterCount = 0; |
| 49 | |
| Nils Diewald | baf68c5 | 2013-11-20 13:22:19 +0000 | [diff] [blame] | 50 | // Logger |
| 51 | private final static Logger log = LoggerFactory.getLogger(KorapCollection.class); |
| 52 | |
| Nils Diewald | baf68c5 | 2013-11-20 13:22:19 +0000 | [diff] [blame] | 53 | // user? |
| Nils Diewald | baf68c5 | 2013-11-20 13:22:19 +0000 | [diff] [blame] | 54 | public KorapCollection (KorapIndex ki) { |
| 55 | this.index = ki; |
| Nils Diewald | 5def8bc | 2013-11-28 19:26:54 +0000 | [diff] [blame] | 56 | this.filter = new ArrayList<FilterOperation>(5); |
| Nils Diewald | baf68c5 | 2013-11-20 13:22:19 +0000 | [diff] [blame] | 57 | }; |
| 58 | |
| Nils Diewald | 01b4ce3 | 2013-12-05 22:39:25 +0000 | [diff] [blame] | 59 | public KorapCollection (String jsonString) { |
| 60 | this.filter = new ArrayList<FilterOperation>(5); |
| 61 | ObjectMapper mapper = new ObjectMapper(); |
| Nils Diewald | 2276e1c | 2014-04-10 15:01:59 +0000 | [diff] [blame] | 62 | |
| Nils Diewald | 01b4ce3 | 2013-12-05 22:39:25 +0000 | [diff] [blame] | 63 | try { |
| 64 | JsonNode json = mapper.readValue(jsonString, JsonNode.class); |
| Nils Diewald | 23417e8 | 2014-02-12 18:33:24 +0000 | [diff] [blame] | 65 | if (json.has("collections")) { |
| Nils Diewald | b1c3b65 | 2013-12-28 22:47:00 +0000 | [diff] [blame] | 66 | log.trace("Add meta collection"); |
| Nils Diewald | 23417e8 | 2014-02-12 18:33:24 +0000 | [diff] [blame] | 67 | for (JsonNode collection : json.get("collections")) { |
| 68 | this.fromJSON(collection); |
| Nils Diewald | 01b4ce3 | 2013-12-05 22:39:25 +0000 | [diff] [blame] | 69 | }; |
| 70 | }; |
| 71 | } |
| 72 | catch (Exception e) { |
| 73 | this.error = e.getMessage(); |
| 74 | }; |
| 75 | }; |
| 76 | |
| Nils Diewald | c925b49 | 2013-12-03 23:56:10 +0000 | [diff] [blame] | 77 | public KorapCollection () { |
| 78 | this.filter = new ArrayList<FilterOperation>(5); |
| 79 | }; |
| 80 | |
| Nils Diewald | b1c3b65 | 2013-12-28 22:47:00 +0000 | [diff] [blame] | 81 | public void fromJSON(JsonNode json) throws QueryException { |
| Nils Diewald | 01b4ce3 | 2013-12-05 22:39:25 +0000 | [diff] [blame] | 82 | String type = json.get("@type").asText(); |
| 83 | |
| 84 | if (type.equals("korap:meta-filter")) { |
| Nils Diewald | b1c3b65 | 2013-12-28 22:47:00 +0000 | [diff] [blame] | 85 | log.trace("Add Filter"); |
| Nils Diewald | fb4d7b0 | 2014-04-09 17:56:17 +0000 | [diff] [blame] | 86 | this.filter(new KorapFilter(json.get("@value"))); |
| Nils Diewald | 01b4ce3 | 2013-12-05 22:39:25 +0000 | [diff] [blame] | 87 | } |
| 88 | else if (type.equals("korap:meta-extend")) { |
| Nils Diewald | b1c3b65 | 2013-12-28 22:47:00 +0000 | [diff] [blame] | 89 | log.trace("Add Extend"); |
| Nils Diewald | fb4d7b0 | 2014-04-09 17:56:17 +0000 | [diff] [blame] | 90 | this.extend(new KorapFilter(json.get("@value"))); |
| Nils Diewald | 01b4ce3 | 2013-12-05 22:39:25 +0000 | [diff] [blame] | 91 | }; |
| 92 | }; |
| 93 | |
| Nils Diewald | baf68c5 | 2013-11-20 13:22:19 +0000 | [diff] [blame] | 94 | public int getCount() { |
| 95 | return this.filterCount; |
| 96 | }; |
| 97 | |
| Nils Diewald | c925b49 | 2013-12-03 23:56:10 +0000 | [diff] [blame] | 98 | public void setIndex (KorapIndex ki) { |
| 99 | this.index = ki; |
| 100 | }; |
| 101 | |
| Nils Diewald | 01b4ce3 | 2013-12-05 22:39:25 +0000 | [diff] [blame] | 102 | // The checks asre not necessary |
| Nils Diewald | c925b49 | 2013-12-03 23:56:10 +0000 | [diff] [blame] | 103 | public KorapCollection filter (BooleanFilter filter) { |
| Nils Diewald | 9f31083 | 2013-12-06 22:38:55 +0000 | [diff] [blame] | 104 | log.trace("Added filter: {}", filter.toString()); |
| 105 | if (filter == null) { |
| 106 | log.warn("No filter is given"); |
| Nils Diewald | 01b4ce3 | 2013-12-05 22:39:25 +0000 | [diff] [blame] | 107 | return this; |
| Nils Diewald | 9f31083 | 2013-12-06 22:38:55 +0000 | [diff] [blame] | 108 | }; |
| Nils Diewald | 01b4ce3 | 2013-12-05 22:39:25 +0000 | [diff] [blame] | 109 | Filter f = (Filter) new QueryWrapperFilter(filter.toQuery()); |
| Nils Diewald | 9f31083 | 2013-12-06 22:38:55 +0000 | [diff] [blame] | 110 | if (f == null) { |
| 111 | log.warn("Filter can't be wrapped"); |
| Nils Diewald | 01b4ce3 | 2013-12-05 22:39:25 +0000 | [diff] [blame] | 112 | return this; |
| Nils Diewald | 9f31083 | 2013-12-06 22:38:55 +0000 | [diff] [blame] | 113 | }; |
| Nils Diewald | 01b4ce3 | 2013-12-05 22:39:25 +0000 | [diff] [blame] | 114 | FilterOperation fo = new FilterOperation(f,false); |
| Nils Diewald | 9f31083 | 2013-12-06 22:38:55 +0000 | [diff] [blame] | 115 | if (fo == null) { |
| 116 | log.warn("Filter operation invalid"); |
| Nils Diewald | 01b4ce3 | 2013-12-05 22:39:25 +0000 | [diff] [blame] | 117 | return this; |
| Nils Diewald | 9f31083 | 2013-12-06 22:38:55 +0000 | [diff] [blame] | 118 | }; |
| Nils Diewald | 01b4ce3 | 2013-12-05 22:39:25 +0000 | [diff] [blame] | 119 | this.filter.add(fo); |
| Nils Diewald | baf68c5 | 2013-11-20 13:22:19 +0000 | [diff] [blame] | 120 | this.filterCount++; |
| Nils Diewald | c925b49 | 2013-12-03 23:56:10 +0000 | [diff] [blame] | 121 | return this; |
| Nils Diewald | baf68c5 | 2013-11-20 13:22:19 +0000 | [diff] [blame] | 122 | }; |
| 123 | |
| Nils Diewald | fb4d7b0 | 2014-04-09 17:56:17 +0000 | [diff] [blame] | 124 | public KorapCollection filter (KorapFilter filter) { |
| 125 | return this.filter(filter.toBooleanFilter()); |
| 126 | }; |
| 127 | |
| 128 | |
| Nils Diewald | c925b49 | 2013-12-03 23:56:10 +0000 | [diff] [blame] | 129 | public KorapCollection extend (BooleanFilter filter) { |
| Nils Diewald | 9f31083 | 2013-12-06 22:38:55 +0000 | [diff] [blame] | 130 | log.trace("Added extension: {}", filter.toString()); |
| Nils Diewald | 5def8bc | 2013-11-28 19:26:54 +0000 | [diff] [blame] | 131 | this.filter.add( |
| 132 | new FilterOperation( |
| Nils Diewald | 9cc86fe | 2013-12-07 17:45:59 +0000 | [diff] [blame] | 133 | (Filter) new QueryWrapperFilter(filter.toQuery()), |
| Nils Diewald | 5def8bc | 2013-11-28 19:26:54 +0000 | [diff] [blame] | 134 | true |
| 135 | ) |
| 136 | ); |
| 137 | this.filterCount++; |
| Nils Diewald | c925b49 | 2013-12-03 23:56:10 +0000 | [diff] [blame] | 138 | return this; |
| Nils Diewald | 5def8bc | 2013-11-28 19:26:54 +0000 | [diff] [blame] | 139 | }; |
| 140 | |
| Nils Diewald | fb4d7b0 | 2014-04-09 17:56:17 +0000 | [diff] [blame] | 141 | public KorapCollection extend (KorapFilter filter) { |
| 142 | return this.extend(filter.toBooleanFilter()); |
| 143 | }; |
| 144 | |
| 145 | |
| Nils Diewald | 5def8bc | 2013-11-28 19:26:54 +0000 | [diff] [blame] | 146 | public ArrayList<FilterOperation> getFilters () { |
| Nils Diewald | baf68c5 | 2013-11-20 13:22:19 +0000 | [diff] [blame] | 147 | return this.filter; |
| 148 | }; |
| 149 | |
| Nils Diewald | 01b4ce3 | 2013-12-05 22:39:25 +0000 | [diff] [blame] | 150 | public FilterOperation getFilter (int i) { |
| 151 | return this.filter.get(i); |
| 152 | }; |
| 153 | |
| 154 | |
| Nils Diewald | b1c3b65 | 2013-12-28 22:47:00 +0000 | [diff] [blame] | 155 | public String toString () { |
| Nils Diewald | 2276e1c | 2014-04-10 15:01:59 +0000 | [diff] [blame] | 156 | StringBuilder sb = new StringBuilder(); |
| Nils Diewald | b1c3b65 | 2013-12-28 22:47:00 +0000 | [diff] [blame] | 157 | for (FilterOperation fo : this.filter) { |
| 158 | sb.append(fo.toString()).append("; "); |
| 159 | }; |
| 160 | return sb.toString(); |
| 161 | }; |
| 162 | |
| Nils Diewald | c925b49 | 2013-12-03 23:56:10 +0000 | [diff] [blame] | 163 | // DEPRECATED BUT USED IN TEST CASES |
| Nils Diewald | baf68c5 | 2013-11-20 13:22:19 +0000 | [diff] [blame] | 164 | public KorapResult search (SpanQuery query) { |
| Nils Diewald | 3ef9a47 | 2013-12-02 16:06:09 +0000 | [diff] [blame] | 165 | return this.index.search(this, query, 0, (short) 20, true, (short) 5, true, (short) 5); |
| Nils Diewald | baf68c5 | 2013-11-20 13:22:19 +0000 | [diff] [blame] | 166 | }; |
| 167 | |
| Nils Diewald | dfb21ea | 2013-11-21 14:26:47 +0000 | [diff] [blame] | 168 | public FixedBitSet bits (AtomicReaderContext atomic) throws IOException { |
| Nils Diewald | baf68c5 | 2013-11-20 13:22:19 +0000 | [diff] [blame] | 169 | |
| 170 | /* |
| Nils Diewald | 41e58f8 | 2013-11-20 20:30:15 +0000 | [diff] [blame] | 171 | Use Bits.MatchAllBits(int len) |
| Nils Diewald | baf68c5 | 2013-11-20 13:22:19 +0000 | [diff] [blame] | 172 | */ |
| 173 | |
| Nils Diewald | dfb21ea | 2013-11-21 14:26:47 +0000 | [diff] [blame] | 174 | boolean noDoc = true; |
| 175 | FixedBitSet bitset; |
| Nils Diewald | baf68c5 | 2013-11-20 13:22:19 +0000 | [diff] [blame] | 176 | |
| 177 | if (this.filterCount > 0) { |
| Nils Diewald | 2276e1c | 2014-04-10 15:01:59 +0000 | [diff] [blame] | 178 | bitset = new FixedBitSet(atomic.reader().maxDoc()); |
| Nils Diewald | baf68c5 | 2013-11-20 13:22:19 +0000 | [diff] [blame] | 179 | |
| Nils Diewald | 5def8bc | 2013-11-28 19:26:54 +0000 | [diff] [blame] | 180 | ArrayList<FilterOperation> filters = (ArrayList<FilterOperation>) this.filter.clone(); |
| Nils Diewald | baf68c5 | 2013-11-20 13:22:19 +0000 | [diff] [blame] | 181 | |
| Nils Diewald | 22efd2d | 2013-11-29 22:54:24 +0000 | [diff] [blame] | 182 | FilterOperation kcInit = filters.remove(0); |
| 183 | log.trace("FILTER: {}", kcInit); |
| 184 | |
| Nils Diewald | baf68c5 | 2013-11-20 13:22:19 +0000 | [diff] [blame] | 185 | // Init vector |
| Nils Diewald | 22efd2d | 2013-11-29 22:54:24 +0000 | [diff] [blame] | 186 | DocIdSet docids = kcInit.filter.getDocIdSet(atomic, null); |
| Nils Diewald | 9cc86fe | 2013-12-07 17:45:59 +0000 | [diff] [blame] | 187 | |
| Nils Diewald | dfb21ea | 2013-11-21 14:26:47 +0000 | [diff] [blame] | 188 | DocIdSetIterator filterIter = docids.iterator(); |
| 189 | |
| 190 | if (filterIter != null) { |
| Nils Diewald | 22efd2d | 2013-11-29 22:54:24 +0000 | [diff] [blame] | 191 | log.trace("InitFilter has effect"); |
| Nils Diewald | 9cc86fe | 2013-12-07 17:45:59 +0000 | [diff] [blame] | 192 | // System.err.println("Init has an effect"); |
| Nils Diewald | dfb21ea | 2013-11-21 14:26:47 +0000 | [diff] [blame] | 193 | bitset.or(filterIter); |
| 194 | noDoc = false; |
| Nils Diewald | baf68c5 | 2013-11-20 13:22:19 +0000 | [diff] [blame] | 195 | }; |
| 196 | |
| Nils Diewald | 41e58f8 | 2013-11-20 20:30:15 +0000 | [diff] [blame] | 197 | if (!noDoc) { |
| Nils Diewald | 5def8bc | 2013-11-28 19:26:54 +0000 | [diff] [blame] | 198 | for (FilterOperation kc : filters) { |
| Nils Diewald | 41e58f8 | 2013-11-20 20:30:15 +0000 | [diff] [blame] | 199 | log.trace("FILTER: {}", kc); |
| Nils Diewald | 5def8bc | 2013-11-28 19:26:54 +0000 | [diff] [blame] | 200 | |
| 201 | // BUG!!! |
| 202 | docids = kc.filter.getDocIdSet(atomic, kc.isExtension() ? null : bitset); |
| Nils Diewald | dfb21ea | 2013-11-21 14:26:47 +0000 | [diff] [blame] | 203 | filterIter = docids.iterator(); |
| Nils Diewald | 5def8bc | 2013-11-28 19:26:54 +0000 | [diff] [blame] | 204 | |
| Nils Diewald | 41e58f8 | 2013-11-20 20:30:15 +0000 | [diff] [blame] | 205 | if (filterIter == null) { |
| 206 | // There must be a better way ... |
| Nils Diewald | 5def8bc | 2013-11-28 19:26:54 +0000 | [diff] [blame] | 207 | if (kc.isFilter()) { |
| Nils Diewald | 2276e1c | 2014-04-10 15:01:59 +0000 | [diff] [blame] | 208 | // TODO: Check if this is really correct! |
| 209 | // Maybe here is the bug |
| Nils Diewald | 5def8bc | 2013-11-28 19:26:54 +0000 | [diff] [blame] | 210 | bitset.clear(0, bitset.length()); |
| 211 | noDoc = true; |
| Nils Diewald | 9cc86fe | 2013-12-07 17:45:59 +0000 | [diff] [blame] | 212 | } |
| 213 | else { |
| 214 | // System.err.println("No term found"); |
| Nils Diewald | 5def8bc | 2013-11-28 19:26:54 +0000 | [diff] [blame] | 215 | }; |
| 216 | continue; |
| Nils Diewald | 41e58f8 | 2013-11-20 20:30:15 +0000 | [diff] [blame] | 217 | }; |
| Nils Diewald | 5def8bc | 2013-11-28 19:26:54 +0000 | [diff] [blame] | 218 | if (kc.isExtension()) { |
| Nils Diewald | 9cc86fe | 2013-12-07 17:45:59 +0000 | [diff] [blame] | 219 | // System.err.println("Term found!"); |
| 220 | // log.trace("Extend filter"); |
| 221 | // System.err.println("Old Card:" + bitset.cardinality()); |
| Nils Diewald | 5def8bc | 2013-11-28 19:26:54 +0000 | [diff] [blame] | 222 | bitset.or(filterIter); |
| Nils Diewald | 9cc86fe | 2013-12-07 17:45:59 +0000 | [diff] [blame] | 223 | // System.err.println("New Card:" + bitset.cardinality()); |
| Nils Diewald | 5def8bc | 2013-11-28 19:26:54 +0000 | [diff] [blame] | 224 | } |
| 225 | else { |
| 226 | bitset.and(filterIter); |
| 227 | }; |
| Nils Diewald | 41e58f8 | 2013-11-20 20:30:15 +0000 | [diff] [blame] | 228 | }; |
| Nils Diewald | dfb21ea | 2013-11-21 14:26:47 +0000 | [diff] [blame] | 229 | |
| 230 | if (!noDoc) { |
| 231 | FixedBitSet livedocs = (FixedBitSet) atomic.reader().getLiveDocs(); |
| 232 | if (livedocs != null) { |
| 233 | bitset.and(livedocs); |
| 234 | }; |
| 235 | }; |
| 236 | } |
| 237 | else { |
| 238 | return bitset; |
| Nils Diewald | baf68c5 | 2013-11-20 13:22:19 +0000 | [diff] [blame] | 239 | }; |
| Nils Diewald | dfb21ea | 2013-11-21 14:26:47 +0000 | [diff] [blame] | 240 | } |
| 241 | else { |
| 242 | bitset = (FixedBitSet) atomic.reader().getLiveDocs(); |
| Nils Diewald | baf68c5 | 2013-11-20 13:22:19 +0000 | [diff] [blame] | 243 | }; |
| 244 | |
| 245 | return bitset; |
| 246 | }; |
| 247 | |
| Nils Diewald | dfb21ea | 2013-11-21 14:26:47 +0000 | [diff] [blame] | 248 | public long numberOf (String foundry, String type) throws IOException { |
| Nils Diewald | c925b49 | 2013-12-03 23:56:10 +0000 | [diff] [blame] | 249 | if (this.index == null) |
| 250 | return (long) 0; |
| 251 | |
| Nils Diewald | baf68c5 | 2013-11-20 13:22:19 +0000 | [diff] [blame] | 252 | return this.index.numberOf(this, foundry, type); |
| 253 | }; |
| 254 | |
| Nils Diewald | dfb21ea | 2013-11-21 14:26:47 +0000 | [diff] [blame] | 255 | public long numberOf (String type) throws IOException { |
| Nils Diewald | c925b49 | 2013-12-03 23:56:10 +0000 | [diff] [blame] | 256 | if (this.index == null) |
| 257 | return (long) 0; |
| 258 | |
| Nils Diewald | dfb21ea | 2013-11-21 14:26:47 +0000 | [diff] [blame] | 259 | return this.index.numberOf(this, "tokens", type); |
| 260 | }; |
| 261 | |
| Nils Diewald | 2276e1c | 2014-04-10 15:01:59 +0000 | [diff] [blame] | 262 | // This is only for testing purposes! |
| 263 | public HashMap getTermRelation(String field) throws Exception { |
| 264 | if (this.index == null) { |
| 265 | HashMap<String,Long> map = new HashMap<>(1); |
| 266 | map.put("-docs", (long) 0); |
| 267 | return map; |
| 268 | }; |
| 269 | |
| 270 | return this.index.getTermRelation(this, field); |
| 271 | }; |
| 272 | |
| 273 | public String getTermRelationJSON(String field) throws IOException { |
| 274 | ObjectMapper mapper = new ObjectMapper(); |
| 275 | StringWriter sw = new StringWriter(); |
| 276 | sw.append("{\"field\":"); |
| 277 | mapper.writeValue(sw,field); |
| 278 | sw.append(","); |
| 279 | |
| 280 | try { |
| 281 | HashMap<String, Long> map = this.getTermRelation(field); |
| 282 | |
| 283 | sw.append("\"documents\":"); |
| 284 | mapper.writeValue(sw,map.remove("-docs")); |
| 285 | sw.append(","); |
| 286 | |
| 287 | String[] keys = map.keySet().toArray(new String[map.size()]); |
| 288 | |
| 289 | HashMap<String,Integer> setHash = new HashMap<>(20); |
| 290 | ArrayList<HashMap<String,Long>> set = new ArrayList<>(20); |
| 291 | ArrayList<Long[]> overlap = new ArrayList<>(100); |
| 292 | |
| 293 | int count = 0; |
| 294 | for (String key : keys) { |
| 295 | if (!key.startsWith("#__")) { |
| 296 | HashMap<String,Long> simpleMap = new HashMap<>(); |
| 297 | simpleMap.put(key, map.remove(key)); |
| 298 | set.add(simpleMap); |
| 299 | setHash.put(key, count++); |
| 300 | }; |
| 301 | }; |
| 302 | |
| 303 | keys = map.keySet().toArray(new String[map.size()]); |
| 304 | for (String key : keys) { |
| 305 | String[] comb = key.substring(3).split(":###:"); |
| 306 | Long[] l = new Long[3]; |
| 307 | l[0] = (long) setHash.get(comb[0]); |
| 308 | l[1] = (long) setHash.get(comb[1]); |
| 309 | l[2] = map.remove(key); |
| 310 | overlap.add(l); |
| 311 | }; |
| 312 | |
| 313 | |
| 314 | sw.append("\"sets\":"); |
| 315 | mapper.writeValue(sw, (Object) set); |
| 316 | sw.append(",\"overlaps\":"); |
| 317 | mapper.writeValue(sw, (Object) overlap); |
| 318 | sw.append(",\"error\":null"); |
| 319 | |
| 320 | } |
| 321 | catch (Exception e) { |
| 322 | sw.append("\"error\":"); |
| 323 | mapper.writeValue(sw,e.getMessage()); |
| 324 | }; |
| 325 | |
| 326 | sw.append("}"); |
| 327 | return sw.getBuffer().toString(); |
| 328 | }; |
| 329 | |
| Nils Diewald | b1c3b65 | 2013-12-28 22:47:00 +0000 | [diff] [blame] | 330 | public String getError () { |
| 331 | return this.error; |
| 332 | }; |
| Nils Diewald | 01b4ce3 | 2013-12-05 22:39:25 +0000 | [diff] [blame] | 333 | }; |