| Michael Hanl | ed5658f | 2014-02-07 22:24:46 +0000 | [diff] [blame] | 1 | package de.ids_mannheim.korap.query.serialize; |
| 2 | |
| Michael Hanl | 034be0d | 2014-02-14 10:17:34 +0000 | [diff] [blame] | 3 | import java.util.LinkedHashMap; |
| Michael Hanl | ed5658f | 2014-02-07 22:24:46 +0000 | [diff] [blame] | 4 | import java.util.LinkedList; |
| 5 | import java.util.List; |
| 6 | import java.util.Map; |
| Michael Hanl | faae018 | 2015-06-26 16:18:06 +0200 | [diff] [blame] | 7 | import java.util.regex.Pattern; |
| Michael Hanl | ed5658f | 2014-02-07 22:24:46 +0000 | [diff] [blame] | 8 | |
| 9 | /** |
| 10 | * @author hanl |
| 11 | * @date 07/02/2014 |
| 12 | */ |
| Michael Hanl | df206ab | 2014-05-13 10:22:27 +0000 | [diff] [blame] | 13 | public class MetaQueryBuilder { |
| Michael Hanl | ed5658f | 2014-02-07 22:24:46 +0000 | [diff] [blame] | 14 | |
| Michael Hanl | faae018 | 2015-06-26 16:18:06 +0200 | [diff] [blame] | 15 | private static Pattern p = Pattern |
| 16 | .compile("\\s*\\d+-(?:c(?:hars?)?|t(?:okens?)?)"); |
| Michael Hanl | ed5658f | 2014-02-07 22:24:46 +0000 | [diff] [blame] | 17 | private Map meta; |
| Michael Hanl | c1c3b5d | 2014-08-04 16:57:46 +0000 | [diff] [blame] | 18 | private SpanContext spanContext; |
| Michael Hanl | ed5658f | 2014-02-07 22:24:46 +0000 | [diff] [blame] | 19 | |
| Akron | 3ac85c9 | 2016-06-06 16:14:40 +0200 | [diff] [blame] | 20 | |
| 21 | public MetaQueryBuilder () { |
| Michael Hanl | 034be0d | 2014-02-14 10:17:34 +0000 | [diff] [blame] | 22 | this.meta = new LinkedHashMap(); |
| Michael Hanl | faae018 | 2015-06-26 16:18:06 +0200 | [diff] [blame] | 23 | // this.meta.put("fields", new LinkedList<>()); |
| Michael Hanl | ed5658f | 2014-02-07 22:24:46 +0000 | [diff] [blame] | 24 | } |
| 25 | |
| Akron | 3ac85c9 | 2016-06-06 16:14:40 +0200 | [diff] [blame] | 26 | |
| Michael Hanl | b9f3fd1 | 2014-06-03 11:38:27 +0000 | [diff] [blame] | 27 | /** |
| 28 | * context segment if context is either of type char or token. |
| 29 | * size can differ for left and right span |
| Akron | 3ac85c9 | 2016-06-06 16:14:40 +0200 | [diff] [blame] | 30 | * |
| Michael Hanl | b9f3fd1 | 2014-06-03 11:38:27 +0000 | [diff] [blame] | 31 | * @param left |
| 32 | * @param leftType |
| 33 | * @param right |
| 34 | * @param rightType |
| 35 | * @return |
| 36 | */ |
| Akron | 3ac85c9 | 2016-06-06 16:14:40 +0200 | [diff] [blame] | 37 | public MetaQueryBuilder setSpanContext (Integer left, String leftType, |
| Joachim Bingel | 20e06ac | 2015-01-15 10:31:33 +0000 | [diff] [blame] | 38 | Integer right, String rightType) { |
| Michael Hanl | c1c3b5d | 2014-08-04 16:57:46 +0000 | [diff] [blame] | 39 | this.spanContext = new SpanContext(left, leftType, right, rightType); |
| Michael Hanl | ed5658f | 2014-02-07 22:24:46 +0000 | [diff] [blame] | 40 | return this; |
| 41 | } |
| 42 | |
| Akron | 3ac85c9 | 2016-06-06 16:14:40 +0200 | [diff] [blame] | 43 | |
| 44 | public SpanContext getSpanContext () { |
| Michael Hanl | c1c3b5d | 2014-08-04 16:57:46 +0000 | [diff] [blame] | 45 | return this.spanContext; |
| 46 | } |
| 47 | |
| Akron | 3ac85c9 | 2016-06-06 16:14:40 +0200 | [diff] [blame] | 48 | |
| Michael Hanl | b9f3fd1 | 2014-06-03 11:38:27 +0000 | [diff] [blame] | 49 | /** |
| Joachim Bingel | 20e06ac | 2015-01-15 10:31:33 +0000 | [diff] [blame] | 50 | * context if of type paragraph or sentence where left and right |
| 51 | * size delimiters are irrelevant; or 2-token, 2-char p/paragraph, |
| Michael Hanl | 93518e4 | 2015-06-26 16:18:06 +0200 | [diff] [blame] | 52 | * s/sentence or token, char. |
| 53 | * Distinguish |
| Akron | 3ac85c9 | 2016-06-06 16:14:40 +0200 | [diff] [blame] | 54 | * |
| Michael Hanl | 1b7f54a | 2014-07-25 17:35:55 +0000 | [diff] [blame] | 55 | * @param context |
| Michael Hanl | b9f3fd1 | 2014-06-03 11:38:27 +0000 | [diff] [blame] | 56 | * @return |
| 57 | */ |
| Akron | 3ac85c9 | 2016-06-06 16:14:40 +0200 | [diff] [blame] | 58 | public MetaQueryBuilder setSpanContext (String context) { |
| Michael Hanl | 93518e4 | 2015-06-26 16:18:06 +0200 | [diff] [blame] | 59 | if (context != null) { |
| Michael Hanl | faae018 | 2015-06-26 16:18:06 +0200 | [diff] [blame] | 60 | if (!p.matcher(context).find()) |
| Michael Hanl | 93518e4 | 2015-06-26 16:18:06 +0200 | [diff] [blame] | 61 | this.spanContext = new SpanContext(context); |
| 62 | else { |
| 63 | String[] ct = context.replaceAll("\\s+", "").split(","); |
| 64 | String[] lc = ct[0].split("-"); |
| 65 | String[] rc = ct[1].split("-"); |
| 66 | this.spanContext = new SpanContext(Integer.valueOf(lc[0]), |
| 67 | lc[1], Integer.valueOf(rc[0]), rc[1]); |
| 68 | } |
| Michael Hanl | 1b7f54a | 2014-07-25 17:35:55 +0000 | [diff] [blame] | 69 | } |
| Michael Hanl | b9f3fd1 | 2014-06-03 11:38:27 +0000 | [diff] [blame] | 70 | return this; |
| 71 | } |
| 72 | |
| Akron | 3ac85c9 | 2016-06-06 16:14:40 +0200 | [diff] [blame] | 73 | |
| 74 | public MetaQueryBuilder addEntry (String name, Object value) { |
| Michael Hanl | 93518e4 | 2015-06-26 16:18:06 +0200 | [diff] [blame] | 75 | if (value != null) |
| 76 | meta.put(name, value); |
| Michael Hanl | ed5658f | 2014-02-07 22:24:46 +0000 | [diff] [blame] | 77 | return this; |
| 78 | } |
| 79 | |
| Akron | 3ac85c9 | 2016-06-06 16:14:40 +0200 | [diff] [blame] | 80 | |
| 81 | public Map raw () { |
| Michael Hanl | dd5c965 | 2014-09-02 18:51:08 +0000 | [diff] [blame] | 82 | if (this.spanContext != null) |
| 83 | meta.putAll(this.spanContext.raw()); |
| Michael Hanl | 034be0d | 2014-02-14 10:17:34 +0000 | [diff] [blame] | 84 | return meta; |
| 85 | } |
| Michael Hanl | c1c3b5d | 2014-08-04 16:57:46 +0000 | [diff] [blame] | 86 | |
| Michael Hanl | c1c3b5d | 2014-08-04 16:57:46 +0000 | [diff] [blame] | 87 | public class SpanContext { |
| 88 | private String left_type; |
| 89 | private String right_type; |
| 90 | private int left_size; |
| 91 | private int right_size; |
| 92 | private String context = null; |
| 93 | |
| Akron | 3ac85c9 | 2016-06-06 16:14:40 +0200 | [diff] [blame] | 94 | |
| Michael Hanl | c1c3b5d | 2014-08-04 16:57:46 +0000 | [diff] [blame] | 95 | /** |
| 96 | * context segment if context is either of type char or token. |
| 97 | * size can differ for left and right span |
| Akron | 3ac85c9 | 2016-06-06 16:14:40 +0200 | [diff] [blame] | 98 | * |
| Michael Hanl | c1c3b5d | 2014-08-04 16:57:46 +0000 | [diff] [blame] | 99 | * @param ls |
| 100 | * @param lt |
| 101 | * @param rs |
| 102 | * @param rt |
| 103 | * @return |
| 104 | */ |
| Akron | 3ac85c9 | 2016-06-06 16:14:40 +0200 | [diff] [blame] | 105 | public SpanContext (int ls, String lt, int rs, String rt) { |
| Michael Hanl | c1c3b5d | 2014-08-04 16:57:46 +0000 | [diff] [blame] | 106 | this.left_type = lt; |
| 107 | this.left_size = ls; |
| 108 | this.right_type = rt; |
| 109 | this.right_size = rs; |
| 110 | } |
| 111 | |
| Akron | 3ac85c9 | 2016-06-06 16:14:40 +0200 | [diff] [blame] | 112 | |
| 113 | public SpanContext (String context) { |
| Michael Hanl | c1c3b5d | 2014-08-04 16:57:46 +0000 | [diff] [blame] | 114 | this.context = context; |
| 115 | } |
| 116 | |
| Michael Hanl | 621180c | 2016-06-04 09:24:56 +0200 | [diff] [blame] | 117 | public String getRightType() { |
| 118 | return this.right_type; |
| 119 | } |
| 120 | |
| 121 | public String getLeftType() { |
| 122 | return this.left_type; |
| 123 | } |
| 124 | |
| 125 | public Integer getLeftSize() { |
| 126 | return this.left_size; |
| 127 | } |
| 128 | |
| 129 | public Integer getRightSize() { |
| 130 | return this.right_size; |
| 131 | } |
| 132 | |
| Michael Hanl | f33f706 | 2015-06-24 21:14:26 +0200 | [diff] [blame] | 133 | public Map raw() { |
| Michael Hanl | c1c3b5d | 2014-08-04 16:57:46 +0000 | [diff] [blame] | 134 | Map meta = new LinkedHashMap(); |
| 135 | if (this.context == null) { |
| 136 | Map map = new LinkedHashMap(); |
| 137 | List l = new LinkedList(); |
| 138 | List r = new LinkedList(); |
| 139 | l.add(this.left_type); |
| 140 | l.add(this.left_size); |
| 141 | map.put("left", l); |
| 142 | r.add(this.right_type); |
| 143 | r.add(this.right_size); |
| 144 | map.put("right", r); |
| 145 | meta.put("context", map); |
| Akron | 3ac85c9 | 2016-06-06 16:14:40 +0200 | [diff] [blame] | 146 | } |
| 147 | else |
| Michael Hanl | c1c3b5d | 2014-08-04 16:57:46 +0000 | [diff] [blame] | 148 | meta.put("context", this.context); |
| 149 | return meta; |
| 150 | } |
| 151 | } |
| Michael Hanl | ed5658f | 2014-02-07 22:24:46 +0000 | [diff] [blame] | 152 | } |