blob: 68ba827aec3ab45529f8776e0dc94d99d77aa6a3 [file] [log] [blame]
package de.ids_mannheim.korap.index;
import de.ids_mannheim.korap.util.CorpusDataException;
import java.util.*;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
*
* A MultiTermToken represents a set of {@link MultiTerm MultiTerms}
* starting at the same position, i.e. represents a segment
* in a {@link MultiTermTokenStream}.
*
* <blockquote><pre>
* MultiTermToken mtt = new MultiTermToken("t:test", "a:abbruch");
* mtt.add("b:banane");
* System.err.println(mtt.toString());
* // [t:test|a:abbruch|b:banane]
* </pre></blockquote>
*
* @author diewald
*/
public class MultiTermToken {
public List<MultiTerm> terms;
private short i = 0;
private boolean sorted = false;
// This advices the java compiler to ignore all loggings
public static final boolean DEBUG = false;
private final Logger log = LoggerFactory
.getLogger(MultiTermTokenStream.class);
/**
* Construct a new MultiTermToken by passing a stream of
* {@link MultiTerm MultiTerms}.
*
* @param terms
* Take at least one {@link MultiTerm} object for a
* token.
*/
public MultiTermToken (MultiTerm terms, MultiTerm ... moreTerms) {
this.terms = new ArrayList<MultiTerm>(16);
this.terms.add(terms);
// Further elements on same position
for (i = 0; i < moreTerms.length; i++) {
this.terms.add(moreTerms[i]);
};
};
/**
* Construct a new MultiTermToken by passing a {@link MultiTerm}
* represented as a prefixed string.
*
* @param prefix
* The term prefix.
* @param surface
* The term surface.
* @see MultiTerm
*/
public MultiTermToken (char prefix, String surface) {
this.terms = new ArrayList<MultiTerm>(16);
// Create a new MultiTerm
try {
MultiTerm term = new MultiTerm(prefix, surface);
// First word element
terms.add(term);
}
catch (CorpusDataException cde) {
log.error("{}: {}", cde.getErrorCode(), cde.getMessage());
};
};
/**
* Construct a new MultiTermToken by passing a stream of
* {@link MultiTerm MultiTerms} represented as strings.
*
* @param terms
* Take at least one {@link MultiTerm} string for a
* token.
*/
public MultiTermToken (String terms, String ... moreTerms)
throws CorpusDataException {
this.terms = new ArrayList<MultiTerm>(16);
MultiTerm term = new MultiTerm(terms);
try {
// First word element
this.terms.add(term);
// Further elements on same position
for (i = 0; i < moreTerms.length; i++) {
term = new MultiTerm(moreTerms[i]);
this.terms.add(term);
};
}
catch (CorpusDataException cde) {
log.error("{}: {}", cde.getErrorCode(), cde.getMessage());
};
};
/**
* Add a new {@link MultiTerm} to the MultiTermToken.
*
* @param term
* A {@link MultiTerm} object.
* @return The {@link MultiTermToken} object for chaining.
*/
public MultiTermToken add (MultiTerm term) {
terms.add(term);
this.sorted = false;
return this;
};
/**
* Add a new {@link MultiTerm} to the MultiTermToken.
*
* @param term
* A MultiTerm represented as a surface string.
* @return The {@link MultiTermToken} object for chaining.
*/
public MultiTermToken add (String term) throws CorpusDataException {
if (term.length() == 0)
return this;
try {
this.add(new MultiTerm(term));
}
catch (CorpusDataException cde) {
log.error("{}: {}", cde.getErrorCode(), cde.getMessage());
};
return this;
};
/**
* Add a new {@link MultiTerm} to the MultiTermToken.
*
* @param prefix
* A MultiTerm prefix.
* @param term
* A MultiTerm represented as a surface string.
* @return The {@link MultiTermToken} object for chaining.
*/
public MultiTermToken add (char prefix, String term) {
if (term.length() == 0)
return this;
try {
this.add(new MultiTerm(prefix, term));
}
catch (CorpusDataException cde) {
log.error("{}: {}", cde.getErrorCode(), cde.getMessage());
};
return this;
};
/**
* Get a {@link MultiTerm} by index.
*
* @param index
* The index position of a {@link MultiTerm} in the
* {@link MultiTermToken}.
* @return A {@link MultiTerm}.
*/
public MultiTerm get (int index) {
return this.sort().terms.get(index);
};
/**
* Get the number of {@link MultiTerm MultiTerms} in the
* MultiTermToken.
*
* @return The number of {@link MultiTerm MultiTerms} in the
* MultiTermToken.
*/
public int getSize () {
return this.terms.size();
};
/**
* Sort the {@link MultiTerm MultiTerms} in the correct order.
*
* @return The {@link MultiTermToken} object for chaining.
*/
public MultiTermToken sort () {
if (this.sorted)
return this;
Collections.sort(this.terms);
this.sorted = true;
return this;
};
/**
* Serialize the MultiTermToken to a string.
*
* @return A string representation of the MultiTermToken,
* with leading offset information.
*/
public String toString () {
this.sort();
StringBuffer sb = new StringBuffer();
sb.append('[');
for (i = 0; i < this.terms.size() - 1; i++) {
sb.append(this.terms.get(i).toString()).append('|');
};
sb.append(this.terms.get(i).toString()).append(']');
return sb.toString();
};
};