blob: 5c797ce7f2493d8c8971f274dc8641e92bdda9de [file] [log] [blame]
Nils Diewaldcde69082014-01-16 15:46:48 +00001package de.ids_mannheim.korap.index;
Nils Diewaldbb33da22015-03-04 16:24:25 +00002
Nils Diewaldcde69082014-01-16 15:46:48 +00003import de.ids_mannheim.korap.index.TermInfo;
Nils Diewald392bcf32015-02-26 20:01:17 +00004import de.ids_mannheim.korap.response.Match;
Nils Diewaldcde69082014-01-16 15:46:48 +00005import de.ids_mannheim.korap.index.PositionsToOffset;
6
Nils Diewaldcde69082014-01-16 15:46:48 +00007import org.slf4j.Logger;
8import org.slf4j.LoggerFactory;
9
10import java.util.*;
11
12public class SpanInfo {
13 ArrayList<TermInfo> terms;
Nils Diewaldbb33da22015-03-04 16:24:25 +000014 HashMap<Integer, Integer> startChar, endChar;
Nils Diewaldcde69082014-01-16 15:46:48 +000015 PositionsToOffset pto;
16 int localDocID;
17
18 // Logger
Nils Diewald392bcf32015-02-26 20:01:17 +000019 private final static Logger log = LoggerFactory.getLogger(Match.class);
20 // This advices the java compiler to ignore all loggings
Nils Diewald82a4b862014-02-20 21:17:41 +000021 public static final boolean DEBUG = false;
22
Nils Diewaldbb33da22015-03-04 16:24:25 +000023
Nils Diewaldcde69082014-01-16 15:46:48 +000024 public SpanInfo (PositionsToOffset pto, int localDocID) {
Nils Diewaldbb33da22015-03-04 16:24:25 +000025 this.terms = new ArrayList<TermInfo>(64);
26 this.startChar = new HashMap<Integer, Integer>(16);
27 this.endChar = new HashMap<Integer, Integer>(16);
28 this.pto = pto;
Nils Diewald392bcf32015-02-26 20:01:17 +000029 this.localDocID = localDocID;
Nils Diewaldcde69082014-01-16 15:46:48 +000030 };
31
Nils Diewaldbb33da22015-03-04 16:24:25 +000032
Nils Diewaldcde69082014-01-16 15:46:48 +000033 public void add (TermInfo info) {
Nils Diewald392bcf32015-02-26 20:01:17 +000034 info.analyze();
35 if (info.getType() != "pos") {
36 this.terms.add(info);
37 }
38 else {
39 this.startChar.put(info.getStartPos(), info.getStartChar());
40 this.endChar.put(info.getEndPos(), info.getEndChar());
41 };
Nils Diewaldcde69082014-01-16 15:46:48 +000042 };
43
Nils Diewaldbb33da22015-03-04 16:24:25 +000044
Nils Diewaldcde69082014-01-16 15:46:48 +000045 public ArrayList<TermInfo> getTerms () {
Nils Diewald392bcf32015-02-26 20:01:17 +000046 // Sort terms (this will also analyze them!)
47 Collections.sort(this.terms);
48 boolean found;
Nils Diewaldcde69082014-01-16 15:46:48 +000049
Nils Diewald392bcf32015-02-26 20:01:17 +000050 // Add character offset information to terms that are
51 // missing this information
52 for (TermInfo t : this.terms) {
53 if (DEBUG)
Nils Diewaldbb33da22015-03-04 16:24:25 +000054 log.trace("Check offsets for {} and {}", t.getStartPos(),
55 t.getEndPos());
Nils Diewald392bcf32015-02-26 20:01:17 +000056 found = true;
57 if (t.getStartChar() == -1) {
58 if (this.startChar.containsKey(t.getStartPos()))
59 t.setStartChar(this.startChar.get(t.getStartPos()));
60 else
61 found = false;
62 }
63 if (t.getEndChar() == -1) {
64 if (this.endChar.containsKey(t.getEndPos()))
65 t.setEndChar(this.endChar.get(t.getEndPos()));
66 else
67 found = false;
68 };
Nils Diewaldbb33da22015-03-04 16:24:25 +000069
Nils Diewald392bcf32015-02-26 20:01:17 +000070 // Add this to found offsets
71 if (found && t.getStartPos() == t.getEndPos())
Nils Diewaldbb33da22015-03-04 16:24:25 +000072 this.pto.addOffset(this.localDocID, t.getStartPos(),
73 t.getStartChar(), t.getEndChar());
Nils Diewald392bcf32015-02-26 20:01:17 +000074 else {
75 if (DEBUG)
76 log.trace("{} can't be found!", t.getAnnotation());
77 this.pto.add(this.localDocID, t.getStartPos());
78 this.pto.add(this.localDocID, t.getStartPos());
79 };
80 };
Nils Diewaldcde69082014-01-16 15:46:48 +000081
Nils Diewald392bcf32015-02-26 20:01:17 +000082 return this.terms;
Nils Diewaldcde69082014-01-16 15:46:48 +000083 };
84};