| Nils Diewald | ea96950 | 2015-02-16 21:10:54 +0000 | [diff] [blame] | 1 | package de.ids_mannheim.korap.collection; |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 2 | |
| margaretha | 1632381 | 2018-09-03 16:43:30 +0200 | [diff] [blame] | 3 | import java.io.File; |
| 4 | import java.io.FileInputStream; |
| Akron | 176c9b1 | 2015-07-29 19:53:40 +0200 | [diff] [blame] | 5 | import java.io.IOException; |
| margaretha | 8efa375 | 2018-07-24 17:46:43 +0200 | [diff] [blame] | 6 | import java.util.ArrayList; |
| margaretha | fe25280 | 2018-07-30 14:59:50 +0200 | [diff] [blame] | 7 | import java.util.HashMap; |
| margaretha | 8efa375 | 2018-07-24 17:46:43 +0200 | [diff] [blame] | 8 | import java.util.Iterator; |
| margaretha | fe25280 | 2018-07-30 14:59:50 +0200 | [diff] [blame] | 9 | import java.util.Map; |
| Akron | b59f40e | 2018-08-23 17:15:43 +0200 | [diff] [blame] | 10 | import java.util.Properties; |
| Akron | 176c9b1 | 2015-07-29 19:53:40 +0200 | [diff] [blame] | 11 | |
| margaretha | 1632381 | 2018-09-03 16:43:30 +0200 | [diff] [blame] | 12 | import org.apache.commons.io.IOUtils; |
| Akron | 408ae35 | 2018-03-28 16:47:41 +0200 | [diff] [blame] | 13 | import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; |
| margaretha | 8efa375 | 2018-07-24 17:46:43 +0200 | [diff] [blame] | 14 | import org.apache.lucene.queries.TermsFilter; |
| 15 | import org.apache.lucene.search.Filter; |
| 16 | import org.apache.lucene.search.NumericRangeFilter; |
| 17 | import org.apache.lucene.search.PhraseQuery; |
| 18 | import org.apache.lucene.search.QueryWrapperFilter; |
| 19 | import org.apache.lucene.search.RegexpQuery; |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 20 | import org.slf4j.Logger; |
| 21 | import org.slf4j.LoggerFactory; |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 22 | |
| Akron | 176c9b1 | 2015-07-29 19:53:40 +0200 | [diff] [blame] | 23 | import de.ids_mannheim.korap.KrillCollection; |
| margaretha | 8efa375 | 2018-07-24 17:46:43 +0200 | [diff] [blame] | 24 | import de.ids_mannheim.korap.index.TextPrependedTokenStream; |
| 25 | import de.ids_mannheim.korap.util.KrillDate; |
| Akron | b59f40e | 2018-08-23 17:15:43 +0200 | [diff] [blame] | 26 | import de.ids_mannheim.korap.util.KrillProperties; |
| margaretha | 1632381 | 2018-09-03 16:43:30 +0200 | [diff] [blame] | 27 | import de.ids_mannheim.korap.util.QueryException; |
| Akron | b59f40e | 2018-08-23 17:15:43 +0200 | [diff] [blame] | 28 | import net.sf.ehcache.Cache; |
| 29 | import net.sf.ehcache.CacheManager; |
| 30 | import net.sf.ehcache.Element; |
| 31 | |
| Akron | 176c9b1 | 2015-07-29 19:53:40 +0200 | [diff] [blame] | 32 | |
| Akron | aa74ec6 | 2015-07-31 17:22:55 +0200 | [diff] [blame] | 33 | /* |
| 34 | * TODO: Optimize! |
| Akron | 408ae35 | 2018-03-28 16:47:41 +0200 | [diff] [blame] | 35 | * - Remove identical object in Boolean groups |
| 36 | * - Flatten boolean groups |
| 37 | * - create "between" ranges for multiple date objects |
| 38 | * |
| 39 | * TODO: |
| 40 | * - Filters are deprecated, they should be ported to queries |
| Akron | aa74ec6 | 2015-07-31 17:22:55 +0200 | [diff] [blame] | 41 | */ |
| 42 | |
| Nils Diewald | ea96950 | 2015-02-16 21:10:54 +0000 | [diff] [blame] | 43 | public class CollectionBuilder { |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 44 | |
| Akron | b59f40e | 2018-08-23 17:15:43 +0200 | [diff] [blame] | 45 | public final static CacheManager cacheManager = CacheManager.newInstance(); |
| 46 | public final static Cache cache = cacheManager.getCache("named_vc"); |
| 47 | |
| 48 | |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 49 | // Logger |
| Akron | 4055017 | 2015-08-04 03:06:12 +0200 | [diff] [blame] | 50 | private final static Logger log = LoggerFactory |
| 51 | .getLogger(KrillCollection.class); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 52 | |
| Nils Diewald | fb4d7b0 | 2014-04-09 17:56:17 +0000 | [diff] [blame] | 53 | // This advices the java compiler to ignore all loggings |
| 54 | public static final boolean DEBUG = false; |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 55 | |
| Akron | 4055017 | 2015-08-04 03:06:12 +0200 | [diff] [blame] | 56 | |
| Akron | 60dfa7e | 2015-08-03 22:15:17 +0200 | [diff] [blame] | 57 | public CollectionBuilder.Interface term (String field, String term) { |
| 58 | return new CollectionBuilder.Term(field, term); |
| Nils Diewald | fb4d7b0 | 2014-04-09 17:56:17 +0000 | [diff] [blame] | 59 | }; |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 60 | |
| Akron | 4055017 | 2015-08-04 03:06:12 +0200 | [diff] [blame] | 61 | |
| Akron | 60dfa7e | 2015-08-03 22:15:17 +0200 | [diff] [blame] | 62 | public CollectionBuilder.Interface re (String field, String term) { |
| 63 | return new CollectionBuilder.Term(field, term, true); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 64 | }; |
| 65 | |
| Akron | 4055017 | 2015-08-04 03:06:12 +0200 | [diff] [blame] | 66 | |
| Akron | 408ae35 | 2018-03-28 16:47:41 +0200 | [diff] [blame] | 67 | public CollectionBuilder.Interface text (String field, String text) { |
| 68 | return new CollectionBuilder.Text(field, text); |
| 69 | }; |
| 70 | |
| 71 | |
| Akron | 60dfa7e | 2015-08-03 22:15:17 +0200 | [diff] [blame] | 72 | public CollectionBuilder.Interface since (String field, String date) { |
| Akron | 176c9b1 | 2015-07-29 19:53:40 +0200 | [diff] [blame] | 73 | int since = new KrillDate(date).floor(); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 74 | |
| Akron | 176c9b1 | 2015-07-29 19:53:40 +0200 | [diff] [blame] | 75 | if (since == 0 || since == KrillDate.BEGINNING) |
| 76 | return null; |
| 77 | |
| Akron | 60dfa7e | 2015-08-03 22:15:17 +0200 | [diff] [blame] | 78 | return new CollectionBuilder.Range(field, since, KrillDate.END); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 79 | }; |
| 80 | |
| Akron | 5e3436f | 2017-07-04 15:28:03 +0200 | [diff] [blame] | 81 | public CollectionBuilder.Interface nothing () { |
| 82 | |
| 83 | // Requires that a field with name "0---" does not exist |
| 84 | return new CollectionBuilder.Term("0---", "0"); |
| 85 | }; |
| 86 | |
| Akron | 4055017 | 2015-08-04 03:06:12 +0200 | [diff] [blame] | 87 | |
| Akron | 60dfa7e | 2015-08-03 22:15:17 +0200 | [diff] [blame] | 88 | public CollectionBuilder.Interface till (String field, String date) { |
| Akron | 176c9b1 | 2015-07-29 19:53:40 +0200 | [diff] [blame] | 89 | try { |
| 90 | int till = new KrillDate(date).ceil(); |
| 91 | if (till == 0 || till == KrillDate.END) |
| 92 | return null; |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 93 | |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 94 | return new CollectionBuilder.Range(field, KrillDate.BEGINNING, |
| 95 | till); |
| Akron | 176c9b1 | 2015-07-29 19:53:40 +0200 | [diff] [blame] | 96 | } |
| 97 | catch (NumberFormatException e) { |
| 98 | log.warn("Parameter of till(date) is invalid"); |
| 99 | }; |
| 100 | return null; |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 101 | }; |
| 102 | |
| Akron | 4055017 | 2015-08-04 03:06:12 +0200 | [diff] [blame] | 103 | |
| Akron | 60dfa7e | 2015-08-03 22:15:17 +0200 | [diff] [blame] | 104 | // This will be optimized away in future versions |
| Akron | 4055017 | 2015-08-04 03:06:12 +0200 | [diff] [blame] | 105 | public CollectionBuilder.Interface between (String field, String start, |
| 106 | String end) { |
| Akron | 60dfa7e | 2015-08-03 22:15:17 +0200 | [diff] [blame] | 107 | CollectionBuilder.Interface startObj = this.since(field, start); |
| 108 | if (startObj == null) |
| 109 | return null; |
| 110 | |
| 111 | CollectionBuilder.Interface endObj = this.till(field, end); |
| 112 | if (endObj == null) |
| 113 | return null; |
| 114 | |
| 115 | return this.andGroup().with(startObj).with(endObj); |
| 116 | }; |
| 117 | |
| Akron | 4055017 | 2015-08-04 03:06:12 +0200 | [diff] [blame] | 118 | |
| Akron | 60dfa7e | 2015-08-03 22:15:17 +0200 | [diff] [blame] | 119 | public CollectionBuilder.Interface date (String field, String date) { |
| Akron | 176c9b1 | 2015-07-29 19:53:40 +0200 | [diff] [blame] | 120 | KrillDate dateDF = new KrillDate(date); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 121 | |
| Akron | 176c9b1 | 2015-07-29 19:53:40 +0200 | [diff] [blame] | 122 | if (dateDF.year == 0) |
| 123 | return null; |
| 124 | |
| 125 | if (dateDF.day == 0 || dateDF.month == 0) { |
| 126 | int begin = dateDF.floor(); |
| 127 | int end = dateDF.ceil(); |
| 128 | |
| 129 | if (end == 0 |
| Akron | 4055017 | 2015-08-04 03:06:12 +0200 | [diff] [blame] | 130 | || (begin == KrillDate.BEGINNING && end == KrillDate.END)) |
| Akron | 176c9b1 | 2015-07-29 19:53:40 +0200 | [diff] [blame] | 131 | return null; |
| 132 | |
| Akron | 60dfa7e | 2015-08-03 22:15:17 +0200 | [diff] [blame] | 133 | return new CollectionBuilder.Range(field, begin, end); |
| Akron | 176c9b1 | 2015-07-29 19:53:40 +0200 | [diff] [blame] | 134 | }; |
| 135 | |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 136 | return new CollectionBuilder.Range(field, dateDF.floor(), |
| 137 | dateDF.ceil()); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 138 | }; |
| 139 | |
| Akron | b59f40e | 2018-08-23 17:15:43 +0200 | [diff] [blame] | 140 | public CollectionBuilder.Interface referTo (String reference) { |
| 141 | return new CollectionBuilder.Reference(reference); |
| 142 | }; |
| 143 | |
| Akron | 4055017 | 2015-08-04 03:06:12 +0200 | [diff] [blame] | 144 | |
| Akron | 60dfa7e | 2015-08-03 22:15:17 +0200 | [diff] [blame] | 145 | public CollectionBuilder.Group andGroup () { |
| 146 | return new CollectionBuilder.Group(false); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 147 | }; |
| 148 | |
| Akron | 4055017 | 2015-08-04 03:06:12 +0200 | [diff] [blame] | 149 | |
| Akron | 60dfa7e | 2015-08-03 22:15:17 +0200 | [diff] [blame] | 150 | public CollectionBuilder.Group orGroup () { |
| 151 | return new CollectionBuilder.Group(true); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 152 | }; |
| 153 | |
| Akron | 60dfa7e | 2015-08-03 22:15:17 +0200 | [diff] [blame] | 154 | public interface Interface { |
| Akron | 176c9b1 | 2015-07-29 19:53:40 +0200 | [diff] [blame] | 155 | public String toString (); |
| Akron | 4055017 | 2015-08-04 03:06:12 +0200 | [diff] [blame] | 156 | |
| 157 | |
| Akron | b59f40e | 2018-08-23 17:15:43 +0200 | [diff] [blame] | 158 | public Filter toFilter () throws QueryException; |
| Akron | 4055017 | 2015-08-04 03:06:12 +0200 | [diff] [blame] | 159 | |
| 160 | |
| Akron | 176c9b1 | 2015-07-29 19:53:40 +0200 | [diff] [blame] | 161 | public boolean isNegative (); |
| Akron | 4055017 | 2015-08-04 03:06:12 +0200 | [diff] [blame] | 162 | |
| 163 | |
| Akron | 60dfa7e | 2015-08-03 22:15:17 +0200 | [diff] [blame] | 164 | public CollectionBuilder.Interface not (); |
| Nils Diewald | baf68c5 | 2013-11-20 13:22:19 +0000 | [diff] [blame] | 165 | }; |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 166 | |
| Akron | 60dfa7e | 2015-08-03 22:15:17 +0200 | [diff] [blame] | 167 | public class Term implements CollectionBuilder.Interface { |
| Akron | 176c9b1 | 2015-07-29 19:53:40 +0200 | [diff] [blame] | 168 | private boolean isNegative = false; |
| 169 | private boolean regex = false; |
| 170 | private String field; |
| 171 | private String term; |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 172 | |
| Akron | 4055017 | 2015-08-04 03:06:12 +0200 | [diff] [blame] | 173 | |
| Akron | 60dfa7e | 2015-08-03 22:15:17 +0200 | [diff] [blame] | 174 | public Term (String field, String term) { |
| Akron | 176c9b1 | 2015-07-29 19:53:40 +0200 | [diff] [blame] | 175 | this.field = field; |
| 176 | this.term = term; |
| 177 | }; |
| 178 | |
| Akron | 4055017 | 2015-08-04 03:06:12 +0200 | [diff] [blame] | 179 | |
| Akron | 60dfa7e | 2015-08-03 22:15:17 +0200 | [diff] [blame] | 180 | public Term (String field, String term, boolean regex) { |
| Akron | 176c9b1 | 2015-07-29 19:53:40 +0200 | [diff] [blame] | 181 | this.field = field; |
| 182 | this.term = term; |
| 183 | this.regex = regex; |
| 184 | }; |
| 185 | |
| Akron | 4055017 | 2015-08-04 03:06:12 +0200 | [diff] [blame] | 186 | |
| Akron | 176c9b1 | 2015-07-29 19:53:40 +0200 | [diff] [blame] | 187 | public Filter toFilter () { |
| 188 | // Regular expression |
| 189 | if (this.regex) |
| 190 | return new QueryWrapperFilter( |
| Akron | 4055017 | 2015-08-04 03:06:12 +0200 | [diff] [blame] | 191 | new RegexpQuery(new org.apache.lucene.index.Term( |
| 192 | this.field, this.term))); |
| 193 | |
| Akron | 176c9b1 | 2015-07-29 19:53:40 +0200 | [diff] [blame] | 194 | // Simple term |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 195 | return new TermsFilter( |
| 196 | new org.apache.lucene.index.Term(this.field, this.term)); |
| Akron | 176c9b1 | 2015-07-29 19:53:40 +0200 | [diff] [blame] | 197 | }; |
| 198 | |
| Akron | 4055017 | 2015-08-04 03:06:12 +0200 | [diff] [blame] | 199 | |
| Akron | 176c9b1 | 2015-07-29 19:53:40 +0200 | [diff] [blame] | 200 | public String toString () { |
| Akron | 60dfa7e | 2015-08-03 22:15:17 +0200 | [diff] [blame] | 201 | Filter filter = this.toFilter(); |
| 202 | if (filter == null) |
| 203 | return ""; |
| 204 | return filter.toString(); |
| Akron | 176c9b1 | 2015-07-29 19:53:40 +0200 | [diff] [blame] | 205 | }; |
| 206 | |
| Akron | 4055017 | 2015-08-04 03:06:12 +0200 | [diff] [blame] | 207 | |
| Akron | 176c9b1 | 2015-07-29 19:53:40 +0200 | [diff] [blame] | 208 | public boolean isNegative () { |
| 209 | return this.isNegative; |
| 210 | }; |
| 211 | |
| 212 | |
| Akron | 60dfa7e | 2015-08-03 22:15:17 +0200 | [diff] [blame] | 213 | public CollectionBuilder.Interface not () { |
| Akron | 176c9b1 | 2015-07-29 19:53:40 +0200 | [diff] [blame] | 214 | this.isNegative = true; |
| 215 | return this; |
| 216 | }; |
| Nils Diewald | baf68c5 | 2013-11-20 13:22:19 +0000 | [diff] [blame] | 217 | }; |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 218 | |
| Akron | 408ae35 | 2018-03-28 16:47:41 +0200 | [diff] [blame] | 219 | |
| 220 | public class Text implements CollectionBuilder.Interface { |
| 221 | private boolean isNegative = false; |
| 222 | // private boolean regex = false; |
| 223 | private String field; |
| 224 | private String text; |
| 225 | |
| 226 | |
| 227 | public Text (String field, String text) { |
| 228 | this.field = field; |
| 229 | this.text = text; |
| 230 | }; |
| 231 | |
| 232 | // TODO: |
| 233 | // Currently this treatment is language specific and |
| Akron | b59f40e | 2018-08-23 17:15:43 +0200 | [diff] [blame] | 234 | // does too much, I guess. |
| Akron | 408ae35 | 2018-03-28 16:47:41 +0200 | [diff] [blame] | 235 | public Filter toFilter () { |
| Akron | 408ae35 | 2018-03-28 16:47:41 +0200 | [diff] [blame] | 236 | PhraseQuery pq = new PhraseQuery(); |
| 237 | int pos = 0; |
| 238 | try { |
| Akron | 2620757 | 2018-04-04 20:21:42 +0200 | [diff] [blame] | 239 | TextPrependedTokenStream tpts = new TextPrependedTokenStream(this.text); |
| 240 | tpts.doNotPrepend(); |
| Akron | 408ae35 | 2018-03-28 16:47:41 +0200 | [diff] [blame] | 241 | CharTermAttribute term; |
| Akron | 2620757 | 2018-04-04 20:21:42 +0200 | [diff] [blame] | 242 | tpts.reset(); |
| 243 | while (tpts.incrementToken()) { |
| 244 | term = tpts.getAttribute(CharTermAttribute.class); |
| Akron | 408ae35 | 2018-03-28 16:47:41 +0200 | [diff] [blame] | 245 | pq.add(new org.apache.lucene.index.Term(this.field, term.toString()), pos++); |
| 246 | }; |
| Akron | 2620757 | 2018-04-04 20:21:42 +0200 | [diff] [blame] | 247 | tpts.close(); |
| Akron | 408ae35 | 2018-03-28 16:47:41 +0200 | [diff] [blame] | 248 | } |
| 249 | catch (IOException ie) { |
| 250 | System.err.println(ie); |
| 251 | return null; |
| 252 | }; |
| Akron | 2620757 | 2018-04-04 20:21:42 +0200 | [diff] [blame] | 253 | |
| Akron | 408ae35 | 2018-03-28 16:47:41 +0200 | [diff] [blame] | 254 | return new QueryWrapperFilter(pq); |
| 255 | }; |
| 256 | |
| 257 | |
| 258 | public String toString () { |
| 259 | Filter filter = this.toFilter(); |
| 260 | if (filter == null) |
| 261 | return ""; |
| 262 | return filter.toString(); |
| 263 | }; |
| 264 | |
| 265 | |
| 266 | public boolean isNegative () { |
| 267 | return this.isNegative; |
| 268 | }; |
| 269 | |
| 270 | |
| 271 | public CollectionBuilder.Interface not () { |
| 272 | this.isNegative = true; |
| 273 | return this; |
| 274 | }; |
| 275 | }; |
| 276 | |
| Akron | b59f40e | 2018-08-23 17:15:43 +0200 | [diff] [blame] | 277 | |
| 278 | public class Reference implements CollectionBuilder.Interface { |
| 279 | private boolean isNegative = false; |
| 280 | private String reference; |
| 281 | private Map<Integer, DocBits> docIdMap = |
| 282 | new HashMap<Integer, DocBits>(); |
| 283 | |
| 284 | public Reference (String reference) { |
| 285 | this.reference = reference; |
| 286 | }; |
| 287 | |
| 288 | public Filter toFilter () throws QueryException { |
| Akron | b59f40e | 2018-08-23 17:15:43 +0200 | [diff] [blame] | 289 | Element element = KrillCollection.cache.get(this.reference); |
| margaretha | 856d450 | 2018-09-04 14:47:45 +0200 | [diff] [blame] | 290 | |
| Akron | b59f40e | 2018-08-23 17:15:43 +0200 | [diff] [blame] | 291 | if (element == null) { |
| margaretha | f96b503 | 2018-12-17 11:43:26 +0100 | [diff] [blame] | 292 | if (DEBUG) { |
| 293 | log.debug(reference + " is NOT found in the cache"); |
| 294 | } |
| Akron | b59f40e | 2018-08-23 17:15:43 +0200 | [diff] [blame] | 295 | KrillCollection kc = new KrillCollection(); |
| 296 | |
| Akron | 65d57e9 | 2018-08-24 19:25:56 +0200 | [diff] [blame] | 297 | kc.fromStore(this.reference); |
| Akron | b59f40e | 2018-08-23 17:15:43 +0200 | [diff] [blame] | 298 | |
| Akron | 65d57e9 | 2018-08-24 19:25:56 +0200 | [diff] [blame] | 299 | if (kc.hasErrors()) { |
| 300 | throw new QueryException( |
| Akron | b59f40e | 2018-08-23 17:15:43 +0200 | [diff] [blame] | 301 | kc.getError(0).getCode(), |
| 302 | kc.getError(0).getMessage() |
| 303 | ); |
| 304 | }; |
| 305 | |
| 306 | return new ToCacheVCFilter( |
| 307 | this.reference, |
| 308 | docIdMap, |
| 309 | kc.getBuilder(), |
| 310 | kc.toFilter() |
| 311 | ); |
| 312 | } |
| 313 | else { |
| margaretha | f96b503 | 2018-12-17 11:43:26 +0100 | [diff] [blame] | 314 | if (DEBUG) { |
| 315 | log.debug(reference + " is FOUND in the cache."); |
| 316 | } |
| Akron | b59f40e | 2018-08-23 17:15:43 +0200 | [diff] [blame] | 317 | CachedVCData cc = (CachedVCData) element.getObjectValue(); |
| 318 | return new CachedVCFilter(this.reference, cc); |
| 319 | } |
| 320 | }; |
| 321 | |
| 322 | |
| 323 | public String toString () { |
| 324 | return "referTo(" + this.reference + ")"; |
| 325 | }; |
| 326 | |
| 327 | |
| 328 | public boolean isNegative () { |
| 329 | return this.isNegative; |
| 330 | }; |
| 331 | |
| 332 | |
| 333 | public CollectionBuilder.Interface not () { |
| 334 | this.isNegative = true; |
| 335 | return this; |
| 336 | }; |
| 337 | |
| 338 | private String loadVCFile (String ref) { |
| 339 | Properties prop = KrillProperties.loadDefaultProperties(); |
| 340 | if (prop == null){ |
| 341 | /* |
| 342 | this.addError(StatusCodes.MISSING_KRILL_PROPERTIES, |
| 343 | "krill.properties is not found."); |
| 344 | */ |
| 345 | return null; |
| 346 | } |
| 347 | |
| 348 | String namedVCPath = prop.getProperty("krill.namedVC"); |
| 349 | if (!namedVCPath.endsWith("/")){ |
| 350 | namedVCPath += "/"; |
| 351 | } |
| 352 | File file = new File(namedVCPath+ref+".jsonld"); |
| 353 | |
| 354 | String json = null; |
| 355 | try { |
| 356 | FileInputStream fis = new FileInputStream(file); |
| 357 | json = IOUtils.toString(fis); |
| 358 | } |
| 359 | catch (IOException e) { |
| 360 | /* |
| 361 | this.addError(StatusCodes.MISSING_COLLECTION, |
| 362 | "Collection is not found."); |
| 363 | */ |
| 364 | return null; |
| 365 | } |
| 366 | return json; |
| 367 | } |
| 368 | }; |
| 369 | |
| Akron | 408ae35 | 2018-03-28 16:47:41 +0200 | [diff] [blame] | 370 | |
| Akron | 60dfa7e | 2015-08-03 22:15:17 +0200 | [diff] [blame] | 371 | public class Group implements CollectionBuilder.Interface { |
| Akron | 176c9b1 | 2015-07-29 19:53:40 +0200 | [diff] [blame] | 372 | private boolean isOptional = false; |
| margaretha | 8a8c427 | 2018-08-21 17:39:27 +0200 | [diff] [blame] | 373 | private boolean isNegative = false; |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 374 | |
| Akron | 4055017 | 2015-08-04 03:06:12 +0200 | [diff] [blame] | 375 | |
| Akron | 176c9b1 | 2015-07-29 19:53:40 +0200 | [diff] [blame] | 376 | public boolean isNegative () { |
| 377 | return this.isNegative; |
| 378 | }; |
| 379 | |
| margaretha | 8a8c427 | 2018-08-21 17:39:27 +0200 | [diff] [blame] | 380 | public void setNegative (boolean isNegative) { |
| 381 | this.isNegative = isNegative; |
| 382 | } |
| 383 | |
| Akron | 176c9b1 | 2015-07-29 19:53:40 +0200 | [diff] [blame] | 384 | public boolean isOptional () { |
| 385 | return this.isOptional; |
| 386 | }; |
| 387 | |
| Akron | 60dfa7e | 2015-08-03 22:15:17 +0200 | [diff] [blame] | 388 | private ArrayList<CollectionBuilder.Interface> operands; |
| Akron | 176c9b1 | 2015-07-29 19:53:40 +0200 | [diff] [blame] | 389 | |
| Akron | 4055017 | 2015-08-04 03:06:12 +0200 | [diff] [blame] | 390 | |
| Akron | 60dfa7e | 2015-08-03 22:15:17 +0200 | [diff] [blame] | 391 | public Group (boolean optional) { |
| Akron | 176c9b1 | 2015-07-29 19:53:40 +0200 | [diff] [blame] | 392 | this.isOptional = optional; |
| Akron | 60dfa7e | 2015-08-03 22:15:17 +0200 | [diff] [blame] | 393 | this.operands = new ArrayList<CollectionBuilder.Interface>(3); |
| Akron | 176c9b1 | 2015-07-29 19:53:40 +0200 | [diff] [blame] | 394 | }; |
| 395 | |
| Akron | 4055017 | 2015-08-04 03:06:12 +0200 | [diff] [blame] | 396 | |
| Akron | 60dfa7e | 2015-08-03 22:15:17 +0200 | [diff] [blame] | 397 | public Group with (CollectionBuilder.Interface cb) { |
| Akron | fd05f50 | 2015-07-30 18:34:26 +0200 | [diff] [blame] | 398 | if (cb == null) |
| 399 | return this; |
| 400 | |
| Akron | 176c9b1 | 2015-07-29 19:53:40 +0200 | [diff] [blame] | 401 | this.operands.add(cb); |
| 402 | return this; |
| 403 | }; |
| 404 | |
| Akron | 4055017 | 2015-08-04 03:06:12 +0200 | [diff] [blame] | 405 | |
| Akron | 60dfa7e | 2015-08-03 22:15:17 +0200 | [diff] [blame] | 406 | public Group with (String field, String term) { |
| 407 | if (field == null || term == null) |
| 408 | return this; |
| 409 | return this.with(new CollectionBuilder.Term(field, term)); |
| 410 | }; |
| Akron | 176c9b1 | 2015-07-29 19:53:40 +0200 | [diff] [blame] | 411 | |
| Akron | 4055017 | 2015-08-04 03:06:12 +0200 | [diff] [blame] | 412 | |
| Akron | b59f40e | 2018-08-23 17:15:43 +0200 | [diff] [blame] | 413 | public Filter toFilter () throws QueryException { |
| Akron | 176c9b1 | 2015-07-29 19:53:40 +0200 | [diff] [blame] | 414 | if (this.operands == null || this.operands.isEmpty()) |
| 415 | return null; |
| 416 | |
| 417 | if (this.operands.size() == 1) |
| 418 | return this.operands.get(0).toFilter(); |
| 419 | |
| 420 | // BooleanFilter bool = new BooleanFilter(); |
| 421 | BooleanGroupFilter bool = new BooleanGroupFilter(this.isOptional); |
| 422 | |
| Akron | 60dfa7e | 2015-08-03 22:15:17 +0200 | [diff] [blame] | 423 | Iterator<CollectionBuilder.Interface> i = this.operands.iterator(); |
| Akron | 176c9b1 | 2015-07-29 19:53:40 +0200 | [diff] [blame] | 424 | while (i.hasNext()) { |
| Akron | 60dfa7e | 2015-08-03 22:15:17 +0200 | [diff] [blame] | 425 | CollectionBuilder.Interface cb = i.next(); |
| Akron | 176c9b1 | 2015-07-29 19:53:40 +0200 | [diff] [blame] | 426 | if (cb.isNegative()) { |
| 427 | bool.without(cb.toFilter()); |
| 428 | } |
| 429 | else { |
| 430 | bool.with(cb.toFilter()); |
| 431 | }; |
| 432 | }; |
| 433 | |
| 434 | return bool; |
| 435 | }; |
| 436 | |
| Akron | 4055017 | 2015-08-04 03:06:12 +0200 | [diff] [blame] | 437 | |
| Akron | 176c9b1 | 2015-07-29 19:53:40 +0200 | [diff] [blame] | 438 | public String toString () { |
| Akron | b59f40e | 2018-08-23 17:15:43 +0200 | [diff] [blame] | 439 | try { |
| 440 | Filter filter = this.toFilter(); |
| 441 | if (filter == null) |
| 442 | return ""; |
| 443 | return filter.toString(); |
| 444 | } |
| 445 | catch (QueryException qe) { |
| 446 | log.warn(qe.getLocalizedMessage()); |
| 447 | }; |
| 448 | return ""; |
| Akron | 176c9b1 | 2015-07-29 19:53:40 +0200 | [diff] [blame] | 449 | }; |
| 450 | |
| Akron | 4055017 | 2015-08-04 03:06:12 +0200 | [diff] [blame] | 451 | |
| Akron | 60dfa7e | 2015-08-03 22:15:17 +0200 | [diff] [blame] | 452 | public CollectionBuilder.Interface not () { |
| Akron | 176c9b1 | 2015-07-29 19:53:40 +0200 | [diff] [blame] | 453 | this.isNegative = true; |
| 454 | return this; |
| 455 | }; |
| Nils Diewald | baf68c5 | 2013-11-20 13:22:19 +0000 | [diff] [blame] | 456 | }; |
| Nils Diewald | fb4d7b0 | 2014-04-09 17:56:17 +0000 | [diff] [blame] | 457 | |
| Akron | 60dfa7e | 2015-08-03 22:15:17 +0200 | [diff] [blame] | 458 | public class Range implements CollectionBuilder.Interface { |
| Akron | 176c9b1 | 2015-07-29 19:53:40 +0200 | [diff] [blame] | 459 | private boolean isNegative = false; |
| 460 | private String field; |
| 461 | private int start, end; |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 462 | |
| Akron | 4055017 | 2015-08-04 03:06:12 +0200 | [diff] [blame] | 463 | |
| Akron | 60dfa7e | 2015-08-03 22:15:17 +0200 | [diff] [blame] | 464 | public Range (String field, int start, int end) { |
| Akron | 176c9b1 | 2015-07-29 19:53:40 +0200 | [diff] [blame] | 465 | this.field = field; |
| 466 | this.start = start; |
| 467 | this.end = end; |
| 468 | }; |
| Nils Diewald | fb4d7b0 | 2014-04-09 17:56:17 +0000 | [diff] [blame] | 469 | |
| Akron | 4055017 | 2015-08-04 03:06:12 +0200 | [diff] [blame] | 470 | |
| Akron | 176c9b1 | 2015-07-29 19:53:40 +0200 | [diff] [blame] | 471 | public boolean isNegative () { |
| 472 | return this.isNegative; |
| 473 | }; |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 474 | |
| Akron | 4055017 | 2015-08-04 03:06:12 +0200 | [diff] [blame] | 475 | |
| Akron | 176c9b1 | 2015-07-29 19:53:40 +0200 | [diff] [blame] | 476 | public String toString () { |
| Akron | 60dfa7e | 2015-08-03 22:15:17 +0200 | [diff] [blame] | 477 | Filter filter = this.toFilter(); |
| 478 | if (filter == null) |
| 479 | return ""; |
| 480 | return filter.toString(); |
| Akron | 176c9b1 | 2015-07-29 19:53:40 +0200 | [diff] [blame] | 481 | }; |
| Nils Diewald | 8db8f92 | 2014-10-24 17:43:13 +0000 | [diff] [blame] | 482 | |
| Akron | 4055017 | 2015-08-04 03:06:12 +0200 | [diff] [blame] | 483 | |
| Akron | 176c9b1 | 2015-07-29 19:53:40 +0200 | [diff] [blame] | 484 | public Filter toFilter () { |
| Akron | 4055017 | 2015-08-04 03:06:12 +0200 | [diff] [blame] | 485 | return NumericRangeFilter.newIntRange(this.field, this.start, |
| 486 | this.end, true, true); |
| Akron | 176c9b1 | 2015-07-29 19:53:40 +0200 | [diff] [blame] | 487 | }; |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 488 | |
| Akron | 4055017 | 2015-08-04 03:06:12 +0200 | [diff] [blame] | 489 | |
| Akron | 60dfa7e | 2015-08-03 22:15:17 +0200 | [diff] [blame] | 490 | public CollectionBuilder.Interface not () { |
| Akron | 176c9b1 | 2015-07-29 19:53:40 +0200 | [diff] [blame] | 491 | this.isNegative = true; |
| 492 | return this; |
| 493 | }; |
| Nils Diewald | fb4d7b0 | 2014-04-09 17:56:17 +0000 | [diff] [blame] | 494 | }; |
| margaretha | 8efa375 | 2018-07-24 17:46:43 +0200 | [diff] [blame] | 495 | |
| margaretha | df0e9d1 | 2018-07-30 16:22:59 +0200 | [diff] [blame] | 496 | /** Builder for virtual corpus / collection existing in the cache |
| 497 | * |
| 498 | * @author margaretha |
| 499 | * |
| 500 | */ |
| margaretha | 85ee2ac | 2018-07-25 17:58:09 +0200 | [diff] [blame] | 501 | public class CachedVC implements CollectionBuilder.Interface { |
| margaretha | 8efa375 | 2018-07-24 17:46:43 +0200 | [diff] [blame] | 502 | |
| Akron | b59f40e | 2018-08-23 17:15:43 +0200 | [diff] [blame] | 503 | private String cacheKey; |
| margaretha | 85ee2ac | 2018-07-25 17:58:09 +0200 | [diff] [blame] | 504 | private CachedVCData cachedCollection; |
| margaretha | 8efa375 | 2018-07-24 17:46:43 +0200 | [diff] [blame] | 505 | private boolean isNegative = false; |
| 506 | |
| Akron | b59f40e | 2018-08-23 17:15:43 +0200 | [diff] [blame] | 507 | public CachedVC (String vcRef, CachedVCData cc) { |
| 508 | this.cacheKey = vcRef; |
| 509 | this.cachedCollection = cc; |
| margaretha | 8efa375 | 2018-07-24 17:46:43 +0200 | [diff] [blame] | 510 | } |
| 511 | |
| 512 | @Override |
| 513 | public Filter toFilter () { |
| Akron | b59f40e | 2018-08-23 17:15:43 +0200 | [diff] [blame] | 514 | return new CachedVCFilter(this.cacheKey, cachedCollection); |
| margaretha | 8efa375 | 2018-07-24 17:46:43 +0200 | [diff] [blame] | 515 | } |
| 516 | |
| 517 | @Override |
| 518 | public boolean isNegative () { |
| 519 | return this.isNegative; |
| 520 | } |
| 521 | |
| 522 | @Override |
| 523 | public CollectionBuilder.Interface not () { |
| 524 | this.isNegative = true; |
| 525 | return this; |
| 526 | } |
| 527 | |
| 528 | } |
| margaretha | fe25280 | 2018-07-30 14:59:50 +0200 | [diff] [blame] | 529 | |
| 530 | /** Wraps a sub CollectionBuilder.Interface to allows VC caching |
| 531 | * |
| 532 | * @author margaretha |
| 533 | * |
| 534 | */ |
| 535 | public class ToCacheVC implements CollectionBuilder.Interface { |
| margaretha | 8efa375 | 2018-07-24 17:46:43 +0200 | [diff] [blame] | 536 | |
| margaretha | fe25280 | 2018-07-30 14:59:50 +0200 | [diff] [blame] | 537 | private CollectionBuilder.Interface child; |
| 538 | private String cacheKey; |
| 539 | |
| margaretha | df0e9d1 | 2018-07-30 16:22:59 +0200 | [diff] [blame] | 540 | private Map<Integer, DocBits> docIdMap; |
| margaretha | fe25280 | 2018-07-30 14:59:50 +0200 | [diff] [blame] | 541 | |
| 542 | public ToCacheVC (String vcRef, Interface cbi) { |
| 543 | this.child = cbi; |
| 544 | this.cacheKey = vcRef; |
| margaretha | df0e9d1 | 2018-07-30 16:22:59 +0200 | [diff] [blame] | 545 | this.docIdMap = new HashMap<Integer, DocBits>(); |
| margaretha | fe25280 | 2018-07-30 14:59:50 +0200 | [diff] [blame] | 546 | } |
| 547 | |
| 548 | @Override |
| Akron | b59f40e | 2018-08-23 17:15:43 +0200 | [diff] [blame] | 549 | public Filter toFilter () throws QueryException { |
| margaretha | fe25280 | 2018-07-30 14:59:50 +0200 | [diff] [blame] | 550 | return new ToCacheVCFilter(cacheKey,docIdMap, child, child.toFilter()); |
| 551 | } |
| 552 | |
| 553 | @Override |
| 554 | public boolean isNegative () { |
| 555 | return child.isNegative(); |
| 556 | } |
| 557 | |
| 558 | @Override |
| 559 | public CollectionBuilder.Interface not () { |
| 560 | // not supported |
| 561 | return this; |
| 562 | } |
| 563 | } |
| Akron | b59f40e | 2018-08-23 17:15:43 +0200 | [diff] [blame] | 564 | |
| 565 | // Maybe irrelevant |
| 566 | public Interface namedVC (String vcRef, CachedVCData cc) { |
| 567 | return new CollectionBuilder.CachedVC(vcRef, cc); |
| margaretha | 8efa375 | 2018-07-24 17:46:43 +0200 | [diff] [blame] | 568 | } |
| margaretha | fe25280 | 2018-07-30 14:59:50 +0200 | [diff] [blame] | 569 | |
| 570 | public Interface toCacheVC (String vcRef, Interface cbi) { |
| 571 | return new CollectionBuilder.ToCacheVC(vcRef, cbi); |
| 572 | } |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 573 | }; |