| Nils Diewald | cde6908 | 2014-01-16 15:46:48 +0000 | [diff] [blame] | 1 | package de.ids_mannheim.korap.index; |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 2 | |
| Nils Diewald | cde6908 | 2014-01-16 15:46:48 +0000 | [diff] [blame] | 3 | import de.ids_mannheim.korap.index.TermInfo; |
| Nils Diewald | 392bcf3 | 2015-02-26 20:01:17 +0000 | [diff] [blame] | 4 | import de.ids_mannheim.korap.response.Match; |
| Nils Diewald | cde6908 | 2014-01-16 15:46:48 +0000 | [diff] [blame] | 5 | import de.ids_mannheim.korap.index.PositionsToOffset; |
| 6 | |
| Nils Diewald | cde6908 | 2014-01-16 15:46:48 +0000 | [diff] [blame] | 7 | import org.slf4j.Logger; |
| 8 | import org.slf4j.LoggerFactory; |
| 9 | |
| 10 | import java.util.*; |
| 11 | |
| 12 | public class SpanInfo { |
| 13 | ArrayList<TermInfo> terms; |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 14 | HashMap<Integer, Integer> startChar, endChar; |
| Nils Diewald | cde6908 | 2014-01-16 15:46:48 +0000 | [diff] [blame] | 15 | PositionsToOffset pto; |
| 16 | int localDocID; |
| 17 | |
| 18 | // Logger |
| Nils Diewald | 392bcf3 | 2015-02-26 20:01:17 +0000 | [diff] [blame] | 19 | private final static Logger log = LoggerFactory.getLogger(Match.class); |
| 20 | // This advices the java compiler to ignore all loggings |
| Nils Diewald | 82a4b86 | 2014-02-20 21:17:41 +0000 | [diff] [blame] | 21 | public static final boolean DEBUG = false; |
| 22 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 23 | |
| Nils Diewald | cde6908 | 2014-01-16 15:46:48 +0000 | [diff] [blame] | 24 | public SpanInfo (PositionsToOffset pto, int localDocID) { |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 25 | this.terms = new ArrayList<TermInfo>(64); |
| 26 | this.startChar = new HashMap<Integer, Integer>(16); |
| 27 | this.endChar = new HashMap<Integer, Integer>(16); |
| 28 | this.pto = pto; |
| Nils Diewald | 392bcf3 | 2015-02-26 20:01:17 +0000 | [diff] [blame] | 29 | this.localDocID = localDocID; |
| Nils Diewald | cde6908 | 2014-01-16 15:46:48 +0000 | [diff] [blame] | 30 | }; |
| 31 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 32 | |
| Nils Diewald | cde6908 | 2014-01-16 15:46:48 +0000 | [diff] [blame] | 33 | public void add (TermInfo info) { |
| Nils Diewald | 392bcf3 | 2015-02-26 20:01:17 +0000 | [diff] [blame] | 34 | info.analyze(); |
| 35 | if (info.getType() != "pos") { |
| 36 | this.terms.add(info); |
| 37 | } |
| 38 | else { |
| 39 | this.startChar.put(info.getStartPos(), info.getStartChar()); |
| 40 | this.endChar.put(info.getEndPos(), info.getEndChar()); |
| 41 | }; |
| Nils Diewald | cde6908 | 2014-01-16 15:46:48 +0000 | [diff] [blame] | 42 | }; |
| 43 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 44 | |
| Nils Diewald | cde6908 | 2014-01-16 15:46:48 +0000 | [diff] [blame] | 45 | public ArrayList<TermInfo> getTerms () { |
| Nils Diewald | 392bcf3 | 2015-02-26 20:01:17 +0000 | [diff] [blame] | 46 | // Sort terms (this will also analyze them!) |
| 47 | Collections.sort(this.terms); |
| 48 | boolean found; |
| Nils Diewald | cde6908 | 2014-01-16 15:46:48 +0000 | [diff] [blame] | 49 | |
| Nils Diewald | 392bcf3 | 2015-02-26 20:01:17 +0000 | [diff] [blame] | 50 | // Add character offset information to terms that are |
| 51 | // missing this information |
| 52 | for (TermInfo t : this.terms) { |
| 53 | if (DEBUG) |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 54 | log.trace("Check offsets for {} and {}", t.getStartPos(), |
| 55 | t.getEndPos()); |
| Nils Diewald | 392bcf3 | 2015-02-26 20:01:17 +0000 | [diff] [blame] | 56 | found = true; |
| 57 | if (t.getStartChar() == -1) { |
| 58 | if (this.startChar.containsKey(t.getStartPos())) |
| 59 | t.setStartChar(this.startChar.get(t.getStartPos())); |
| 60 | else |
| 61 | found = false; |
| 62 | } |
| 63 | if (t.getEndChar() == -1) { |
| 64 | if (this.endChar.containsKey(t.getEndPos())) |
| 65 | t.setEndChar(this.endChar.get(t.getEndPos())); |
| 66 | else |
| 67 | found = false; |
| 68 | }; |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 69 | |
| Nils Diewald | 392bcf3 | 2015-02-26 20:01:17 +0000 | [diff] [blame] | 70 | // Add this to found offsets |
| 71 | if (found && t.getStartPos() == t.getEndPos()) |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 72 | this.pto.addOffset(this.localDocID, t.getStartPos(), |
| 73 | t.getStartChar(), t.getEndChar()); |
| Nils Diewald | 392bcf3 | 2015-02-26 20:01:17 +0000 | [diff] [blame] | 74 | else { |
| 75 | if (DEBUG) |
| 76 | log.trace("{} can't be found!", t.getAnnotation()); |
| 77 | this.pto.add(this.localDocID, t.getStartPos()); |
| 78 | this.pto.add(this.localDocID, t.getStartPos()); |
| 79 | }; |
| 80 | }; |
| Nils Diewald | cde6908 | 2014-01-16 15:46:48 +0000 | [diff] [blame] | 81 | |
| Nils Diewald | 392bcf3 | 2015-02-26 20:01:17 +0000 | [diff] [blame] | 82 | return this.terms; |
| Nils Diewald | cde6908 | 2014-01-16 15:46:48 +0000 | [diff] [blame] | 83 | }; |
| 84 | }; |