| Eliza Margaretha | f13b8ad | 2014-10-13 16:36:28 +0000 | [diff] [blame] | 1 | package de.ids_mannheim.korap.query; |
| 2 | |
| 3 | import java.io.IOException; |
| margaretha | f70addb | 2015-04-27 13:17:18 +0200 | [diff] [blame] | 4 | import java.util.Arrays; |
| 5 | import java.util.List; |
| Eliza Margaretha | f13b8ad | 2014-10-13 16:36:28 +0000 | [diff] [blame] | 6 | import java.util.Map; |
| 7 | |
| Akron | 700c1eb | 2015-09-25 16:57:30 +0200 | [diff] [blame] | 8 | import org.apache.lucene.index.LeafReaderContext; |
| Eliza Margaretha | f13b8ad | 2014-10-13 16:36:28 +0000 | [diff] [blame] | 9 | import org.apache.lucene.index.Term; |
| 10 | import org.apache.lucene.index.TermContext; |
| 11 | import org.apache.lucene.search.spans.SpanQuery; |
| 12 | import org.apache.lucene.search.spans.Spans; |
| 13 | import org.apache.lucene.util.Bits; |
| 14 | import org.apache.lucene.util.ToStringUtils; |
| 15 | |
| margaretha | f09fdd4 | 2017-12-19 17:47:50 +0100 | [diff] [blame] | 16 | import de.ids_mannheim.korap.constants.RelationDirection; |
| Eliza Margaretha | f13b8ad | 2014-10-13 16:36:28 +0000 | [diff] [blame] | 17 | import de.ids_mannheim.korap.query.spans.RelationSpans; |
| 18 | |
| Eliza Margaretha | 19cecc6 | 2014-12-19 17:10:06 +0000 | [diff] [blame] | 19 | /** |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 20 | * SpanRelationQuery retrieves spans representing a relation between |
| Eliza Margaretha | dc98dc1 | 2016-11-16 14:33:42 +0100 | [diff] [blame] | 21 | * tokens, elements, or a-token-and-an-element. Relation are marked |
| 22 | * with prefix "<" or ">". The direction of the angle bracket |
| 23 | * represents the direction of the corresponding relation. By default, |
| 24 | * the relation is set ">". |
| Akron | 6759b04 | 2016-04-28 01:25:00 +0200 | [diff] [blame] | 25 | * <br/><br/> |
| Eliza Margaretha | 19cecc6 | 2014-12-19 17:10:06 +0000 | [diff] [blame] | 26 | * |
| 27 | * This class provides two types of query: |
| 28 | * <ol> |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 29 | * <li>querying any relations, for instance dependency relation |
| 30 | * "<:xip/syntax-dep_rel". |
| Eliza Margaretha | 19cecc6 | 2014-12-19 17:10:06 +0000 | [diff] [blame] | 31 | * |
| 32 | * <pre>SpanRelationQuery sq = new SpanRelationQuery( |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 33 | * new SpanTermQuery( |
| 34 | * new Term("tokens","<:xip/syntax-dep_rel")), |
| 35 | * true); |
| Eliza Margaretha | 19cecc6 | 2014-12-19 17:10:06 +0000 | [diff] [blame] | 36 | * </pre> |
| 37 | * </li> |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 38 | * <li>querying relations matching a certain type of sources/targets, |
| Eliza Margaretha | dc98dc1 | 2016-11-16 14:33:42 +0100 | [diff] [blame] | 39 | * that are the left or the right sides of the relations. This query |
| 40 | * is used within {@link SpanRelationPartQuery}, for instance, to |
| 41 | * retrieve all dependency relations "<:xip/syntax-dep_rel" whose |
| 42 | * sources (right side) are noun phrases. |
| Eliza Margaretha | 19cecc6 | 2014-12-19 17:10:06 +0000 | [diff] [blame] | 43 | * <pre> |
| 44 | * SpanRelationPartQuery rv = |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 45 | * new SpanRelationPartQuery(sq, new SpanElementQuery("tokens","np"), |
| 46 | * true, |
| 47 | * false, true); |
| Eliza Margaretha | 19cecc6 | 2014-12-19 17:10:06 +0000 | [diff] [blame] | 48 | * </pre> |
| 49 | * </li> |
| 50 | * |
| 51 | * </ol> |
| 52 | * |
| 53 | * @author margaretha |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 54 | */ |
| margaretha | 50c7633 | 2015-03-19 10:10:39 +0100 | [diff] [blame] | 55 | public class SpanRelationQuery extends SimpleSpanQuery { |
| Eliza Margaretha | f13b8ad | 2014-10-13 16:36:28 +0000 | [diff] [blame] | 56 | |
| margaretha | f09fdd4 | 2017-12-19 17:47:50 +0100 | [diff] [blame] | 57 | private RelationDirection direction; |
| margaretha | f70addb | 2015-04-27 13:17:18 +0200 | [diff] [blame] | 58 | private byte tempSourceNum = 1; |
| 59 | private byte tempTargetNum = 2; |
| 60 | private byte sourceClass; |
| 61 | private byte targetClass; |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 62 | |
| Akron | bb5d173 | 2015-06-22 01:22:40 +0200 | [diff] [blame] | 63 | private List<Byte> tempClassNumbers = Arrays.asList(tempSourceNum, |
| 64 | tempTargetNum); |
| 65 | |
| 66 | |
| Eliza Margaretha | 2b0a564 | 2015-03-04 10:33:51 +0000 | [diff] [blame] | 67 | /** |
| 68 | * Constructs a SpanRelationQuery based on the given span query. |
| 69 | * |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 70 | * @param firstClause |
| 71 | * a SpanQuery. |
| Eliza Margaretha | 2b0a564 | 2015-03-04 10:33:51 +0000 | [diff] [blame] | 72 | * @param collectPayloads |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 73 | * a boolean flag representing the value |
| 74 | * <code>true</code> if |
| 75 | * payloads are to be collected, otherwise |
| 76 | * <code>false</code>. |
| Eliza Margaretha | 2b0a564 | 2015-03-04 10:33:51 +0000 | [diff] [blame] | 77 | */ |
| margaretha | f09fdd4 | 2017-12-19 17:47:50 +0100 | [diff] [blame] | 78 | public SpanRelationQuery (SpanQuery firstClause, boolean collectPayloads, |
| 79 | RelationDirection direction) { |
| Eliza Margaretha | 19cecc6 | 2014-12-19 17:10:06 +0000 | [diff] [blame] | 80 | super(firstClause, collectPayloads); |
| margaretha | f09fdd4 | 2017-12-19 17:47:50 +0100 | [diff] [blame] | 81 | this.direction = direction; |
| 82 | // SpanTermQuery st = (SpanTermQuery) firstClause; |
| 83 | // String direction = st.getTerm().text().substring(0, 1); |
| 84 | // if (direction.equals("<")) { |
| 85 | // this.direction = 1; |
| 86 | // } |
| Eliza Margaretha | 19cecc6 | 2014-12-19 17:10:06 +0000 | [diff] [blame] | 87 | } |
| Eliza Margaretha | f13b8ad | 2014-10-13 16:36:28 +0000 | [diff] [blame] | 88 | |
| Akron | bb5d173 | 2015-06-22 01:22:40 +0200 | [diff] [blame] | 89 | |
| margaretha | f09fdd4 | 2017-12-19 17:47:50 +0100 | [diff] [blame] | 90 | // public SpanRelationQuery (SpanQuery firstClause, List<Byte> classNumbers, |
| 91 | // boolean collectPayloads) { |
| 92 | // this(firstClause, collectPayloads); |
| 93 | // this.tempClassNumbers = classNumbers; |
| 94 | // this.tempSourceNum = classNumbers.get(0); |
| 95 | // this.tempTargetNum = classNumbers.get(1); |
| 96 | // } |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 97 | |
| Akron | bb5d173 | 2015-06-22 01:22:40 +0200 | [diff] [blame] | 98 | |
| Eliza Margaretha | 19cecc6 | 2014-12-19 17:10:06 +0000 | [diff] [blame] | 99 | @Override |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 100 | public SimpleSpanQuery clone () { |
| Eliza Margaretha | 19cecc6 | 2014-12-19 17:10:06 +0000 | [diff] [blame] | 101 | SimpleSpanQuery sq = new SpanRelationQuery( |
| margaretha | f09fdd4 | 2017-12-19 17:47:50 +0100 | [diff] [blame] | 102 | (SpanQuery) this.firstClause.clone(), this.collectPayloads, |
| 103 | this.direction); |
| Eliza Margaretha | 19cecc6 | 2014-12-19 17:10:06 +0000 | [diff] [blame] | 104 | return sq; |
| 105 | } |
| Eliza Margaretha | f13b8ad | 2014-10-13 16:36:28 +0000 | [diff] [blame] | 106 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 107 | |
| Eliza Margaretha | 19cecc6 | 2014-12-19 17:10:06 +0000 | [diff] [blame] | 108 | @Override |
| Akron | 700c1eb | 2015-09-25 16:57:30 +0200 | [diff] [blame] | 109 | public Spans getSpans (LeafReaderContext context, Bits acceptDocs, |
| Eliza Margaretha | 19cecc6 | 2014-12-19 17:10:06 +0000 | [diff] [blame] | 110 | Map<Term, TermContext> termContexts) throws IOException { |
| 111 | return new RelationSpans(this, context, acceptDocs, termContexts); |
| 112 | } |
| 113 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 114 | |
| Eliza Margaretha | 19cecc6 | 2014-12-19 17:10:06 +0000 | [diff] [blame] | 115 | @Override |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 116 | public String toString (String field) { |
| Eliza Margaretha | 19cecc6 | 2014-12-19 17:10:06 +0000 | [diff] [blame] | 117 | StringBuilder sb = new StringBuilder(); |
| margaretha | f70addb | 2015-04-27 13:17:18 +0200 | [diff] [blame] | 118 | if (sourceClass > 0) { |
| 119 | sb.append("{"); |
| 120 | sb.append(sourceClass); |
| 121 | sb.append(": source:"); |
| 122 | } |
| 123 | if (targetClass > 0) { |
| 124 | sb.append("{"); |
| 125 | sb.append(targetClass); |
| 126 | sb.append(": target:"); |
| 127 | } |
| Eliza Margaretha | 19cecc6 | 2014-12-19 17:10:06 +0000 | [diff] [blame] | 128 | sb.append("spanRelation("); |
| 129 | sb.append(firstClause.toString(field)); |
| 130 | sb.append(")"); |
| margaretha | f70addb | 2015-04-27 13:17:18 +0200 | [diff] [blame] | 131 | if (sourceClass > 0) { |
| 132 | sb.append("}"); |
| 133 | } |
| 134 | if (targetClass > 0) { |
| 135 | sb.append("}"); |
| 136 | } |
| Eliza Margaretha | 19cecc6 | 2014-12-19 17:10:06 +0000 | [diff] [blame] | 137 | sb.append(ToStringUtils.boost(getBoost())); |
| 138 | return sb.toString(); |
| 139 | } |
| Eliza Margaretha | f13b8ad | 2014-10-13 16:36:28 +0000 | [diff] [blame] | 140 | |
| Akron | bb5d173 | 2015-06-22 01:22:40 +0200 | [diff] [blame] | 141 | |
| margaretha | f09fdd4 | 2017-12-19 17:47:50 +0100 | [diff] [blame] | 142 | public RelationDirection getDirection () { |
| margaretha | ca8d622 | 2015-04-15 13:46:41 +0200 | [diff] [blame] | 143 | return direction; |
| 144 | } |
| 145 | |
| Akron | bb5d173 | 2015-06-22 01:22:40 +0200 | [diff] [blame] | 146 | |
| margaretha | f09fdd4 | 2017-12-19 17:47:50 +0100 | [diff] [blame] | 147 | public void setDirection (RelationDirection direction) { |
| margaretha | ca8d622 | 2015-04-15 13:46:41 +0200 | [diff] [blame] | 148 | this.direction = direction; |
| 149 | } |
| 150 | |
| Akron | bb5d173 | 2015-06-22 01:22:40 +0200 | [diff] [blame] | 151 | |
| 152 | public List<Byte> getTempClassNumbers () { |
| margaretha | f70addb | 2015-04-27 13:17:18 +0200 | [diff] [blame] | 153 | return tempClassNumbers; |
| 154 | } |
| 155 | |
| Akron | bb5d173 | 2015-06-22 01:22:40 +0200 | [diff] [blame] | 156 | |
| 157 | public void setTempClassNumbers (List<Byte> classNumbers) { |
| margaretha | f70addb | 2015-04-27 13:17:18 +0200 | [diff] [blame] | 158 | this.tempClassNumbers = classNumbers; |
| 159 | } |
| 160 | |
| Akron | bb5d173 | 2015-06-22 01:22:40 +0200 | [diff] [blame] | 161 | |
| 162 | public byte getTempSourceNum () { |
| margaretha | f70addb | 2015-04-27 13:17:18 +0200 | [diff] [blame] | 163 | return tempSourceNum; |
| 164 | } |
| 165 | |
| Akron | bb5d173 | 2015-06-22 01:22:40 +0200 | [diff] [blame] | 166 | |
| 167 | public void setTempSourceNum (byte sourceNum) { |
| margaretha | f70addb | 2015-04-27 13:17:18 +0200 | [diff] [blame] | 168 | this.tempSourceNum = sourceNum; |
| 169 | } |
| 170 | |
| Akron | bb5d173 | 2015-06-22 01:22:40 +0200 | [diff] [blame] | 171 | |
| 172 | public byte getTempTargetNum () { |
| margaretha | f70addb | 2015-04-27 13:17:18 +0200 | [diff] [blame] | 173 | return tempTargetNum; |
| 174 | } |
| 175 | |
| Akron | bb5d173 | 2015-06-22 01:22:40 +0200 | [diff] [blame] | 176 | |
| 177 | public void setTempTargetNum (byte targetNum) { |
| margaretha | f70addb | 2015-04-27 13:17:18 +0200 | [diff] [blame] | 178 | this.tempTargetNum = targetNum; |
| 179 | } |
| 180 | |
| Akron | bb5d173 | 2015-06-22 01:22:40 +0200 | [diff] [blame] | 181 | |
| 182 | public byte getSourceClass () { |
| margaretha | f70addb | 2015-04-27 13:17:18 +0200 | [diff] [blame] | 183 | return sourceClass; |
| 184 | } |
| 185 | |
| Akron | bb5d173 | 2015-06-22 01:22:40 +0200 | [diff] [blame] | 186 | |
| 187 | public void setSourceClass (byte sourceClass) |
| margaretha | f70addb | 2015-04-27 13:17:18 +0200 | [diff] [blame] | 188 | throws IllegalArgumentException { |
| 189 | if (sourceClass < 1) { |
| 190 | throw new IllegalArgumentException( |
| 191 | "Class number must be bigger than 0."); |
| 192 | } |
| 193 | |
| 194 | this.sourceClass = sourceClass; |
| 195 | } |
| 196 | |
| Akron | bb5d173 | 2015-06-22 01:22:40 +0200 | [diff] [blame] | 197 | |
| 198 | public byte getTargetClass () { |
| margaretha | f70addb | 2015-04-27 13:17:18 +0200 | [diff] [blame] | 199 | return targetClass; |
| 200 | } |
| 201 | |
| Akron | bb5d173 | 2015-06-22 01:22:40 +0200 | [diff] [blame] | 202 | |
| 203 | public void setTargetClass (byte targetClass) |
| margaretha | f70addb | 2015-04-27 13:17:18 +0200 | [diff] [blame] | 204 | throws IllegalArgumentException { |
| 205 | if (targetClass < 1) { |
| 206 | throw new IllegalArgumentException( |
| 207 | "Class number must be bigger than 0."); |
| 208 | } |
| 209 | this.targetClass = targetClass; |
| 210 | } |
| Eliza Margaretha | f13b8ad | 2014-10-13 16:36:28 +0000 | [diff] [blame] | 211 | } |