Add mostly auto-generated api documentation
Change-Id: Ib007e4fbe69d3197a29f478a809fec4e3a541f4e
diff --git a/pom.xml b/pom.xml
index 037fa02..256bef1 100644
--- a/pom.xml
+++ b/pom.xml
@@ -258,6 +258,21 @@
</execution>
</executions>
</plugin>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-javadoc-plugin</artifactId>
+ <version>3.5.0</version>
+ <configuration>
+ <doclint>all,-missing</doclint>
+ <tags>
+ <tag>
+ <name>apiNote</name>
+ <placement>a</placement>
+ <head>API Note:</head>
+ </tag>
+ </tags>
+ </configuration>
+ </plugin>
</plugins>
</build>
diff --git a/src/main/java/de/ids_mannheim/korap/tokenizer/KorapTokenizer.java b/src/main/java/de/ids_mannheim/korap/tokenizer/KorapTokenizer.java
index cb8f280..ba5a882 100644
--- a/src/main/java/de/ids_mannheim/korap/tokenizer/KorapTokenizer.java
+++ b/src/main/java/de/ids_mannheim/korap/tokenizer/KorapTokenizer.java
@@ -6,38 +6,102 @@
import java.io.PrintStream;
import java.io.Reader;
+/**
+ * The interface Korap tokenizer.
+ *
+ * @author kupietz
+ * @version $Id: $Id
+ */
public interface KorapTokenizer extends opennlp.tools.tokenize.Tokenizer, opennlp.tools.sentdetect.SentenceDetector {
+ /**
+ * Scan.
+ *
+ * @throws java.io.IOException the io exception
+ */
void scan() throws IOException;
/**
* Mainly targeted language(s)
+ *
* @return list of ISO 639 alpha-2 or alpha-3 language codes
* @apiNote will later be used to find appropriate implementations via reflection
*/
CharSequence[] getTargetLanguages();
-
+
+ /**
+ * Sets input reader.
+ *
+ * @param inputReader the input reader
+ */
void setInputReader(Reader inputReader);
+ /**
+ * Switches sentence splitting on or off.
+ *
+ * @param splitSentences the split sentences flag
+ */
void setSplitSentences(boolean splitSentences);
+ /**
+ * Switches input echoing on or off.
+ *
+ * @param echo the echo flag
+ */
void setEcho(boolean echo);
+ /**
+ * Switches offset printing on or off.
+ *
+ * @param printOffsets the print offsets
+ */
void setPrintOffsets(boolean printOffsets);
+ /**
+ * Switches token printing on or off.
+ *
+ * @param tokenize the tokenize flag
+ */
void setPrintTokens(boolean tokenize);
+ /**
+ * Sets output stream.
+ *
+ * @param outputStream the output stream
+ */
void setOutputStream(PrintStream outputStream);
+ /**
+ * Switches normalization on or off.
+ *
+ * @param normalize the normalize flag
+ */
void setNormalize(boolean normalize);
+ /** {@inheritDoc} */
String[] tokenize(String s);
+ /** {@inheritDoc} */
Span[] tokenizePos(String s);
+ /**
+ * Sent detect string [ ].
+ *
+ * @param s the s
+ * @return the string [ ]
+ */
String[] sentDetect(String s);
+ /**
+ * Sent pos detect span [ ].
+ *
+ * @param s the s
+ * @return the span [ ]
+ */
Span[] sentPosDetect(String s);
+ /**
+ * The type Builder.
+ */
class Builder {
private boolean splitSentences;
private boolean echo;
@@ -48,46 +112,102 @@
private Class tokenizerClass;
private Reader inputReader;
+ /**
+ * Tokenizer class name builder.
+ *
+ * @param tokenizerClassName the tokenizer class name
+ * @return the builder
+ * @throws ClassNotFoundException the class not found exception
+ */
public Builder tokenizerClassName(String tokenizerClassName) throws ClassNotFoundException {
this.tokenizerClass = Class.forName(tokenizerClassName);
return this;
}
+ /**
+ * Split sentences builder.
+ *
+ * @param splitSentences the split sentences
+ * @return the builder
+ */
public Builder splitSentences(boolean splitSentences) {
this.splitSentences = splitSentences;
return this;
}
+ /**
+ * Sets echo.
+ *
+ * @param echo the echo
+ * @return the echo
+ */
public Builder setEcho(boolean echo) {
this.echo = echo;
return this;
}
+ /**
+ * Print offsets builder.
+ *
+ * @param printOffsets the print offsets
+ * @return the builder
+ */
public Builder printOffsets(boolean printOffsets) {
this.printOffsets = printOffsets;
return this;
}
+ /**
+ * Print tokens builder.
+ *
+ * @param printTokens the print tokens
+ * @return the builder
+ */
public Builder printTokens(boolean printTokens) {
this.printTokens = printTokens;
return this;
}
+ /**
+ * Input reader builder.
+ *
+ * @param inputReader the input reader
+ * @return the builder
+ */
public Builder inputReader(Reader inputReader) {
this.inputReader = inputReader;
return this;
}
+ /**
+ * Normalize builder.
+ *
+ * @param normalize the normalize
+ * @return the builder
+ */
public Builder normalize(boolean normalize) {
this.normalize = normalize;
return this;
}
+ /**
+ * Output stream builder.
+ *
+ * @param outputStream the output stream
+ * @return the builder
+ */
public Builder outputStream(PrintStream outputStream) {
this.outputStream = outputStream;
return this;
}
+ /**
+ * Build korap tokenizer.
+ *
+ * @return the korap tokenizer
+ * @throws IllegalAccessException the illegal access exception
+ * @throws InstantiationException the instantiation exception
+ */
public KorapTokenizer build() throws IllegalAccessException, InstantiationException {
KorapTokenizer korapTokenizer = (KorapTokenizer) tokenizerClass.newInstance();
korapTokenizer.setEcho(echo);
diff --git a/src/main/java/de/ids_mannheim/korap/tokenizer/Languages.java b/src/main/java/de/ids_mannheim/korap/tokenizer/Languages.java
index 623edc8..e5e67c4 100644
--- a/src/main/java/de/ids_mannheim/korap/tokenizer/Languages.java
+++ b/src/main/java/de/ids_mannheim/korap/tokenizer/Languages.java
@@ -1,5 +1,16 @@
package de.ids_mannheim.korap.tokenizer;
+/**
+ * The interface Languages.
+ *
+ * @author kupietz
+ * @version $Id: $Id
+ */
public @interface Languages {
+ /**
+ * Value string [ ].
+ *
+ * @return the string [ ]
+ */
String[] value();
}
diff --git a/src/main/java/de/ids_mannheim/korap/tokenizer/Main.java b/src/main/java/de/ids_mannheim/korap/tokenizer/Main.java
index 475a843..7115928 100644
--- a/src/main/java/de/ids_mannheim/korap/tokenizer/Main.java
+++ b/src/main/java/de/ids_mannheim/korap/tokenizer/Main.java
@@ -13,16 +13,37 @@
import java.util.concurrent.Callable;
import java.util.stream.Collectors;
+/**
+ * The type Main.
+ *
+ * @author kupietz
+ * @version $Id: $Id
+ */
@CommandLine.Command(mixinStandardHelpOptions = true,
name = "koraptokenizer", version = "2.2.3", description = "Tokenizes (and sentence splits) text input.")
public class Main implements Callable<Integer> {
+ /**
+ * The Default language.
+ */
public final String DEFAULT_LANGUAGE = "de";
+ /**
+ * The Default tokenizer class name.
+ */
public final String DEFAULT_TOKENIZER_CLASS_NAME = DerekoDfaTokenizer_de.class.getName();
+ /**
+ * The Spec.
+ */
@CommandLine.Spec
CommandLine.Model.CommandSpec spec;
+ /**
+ * Gets tokenizer for language.
+ *
+ * @param languageTwoLetterCode the language two letter code
+ * @return the tokenizer for language
+ */
public static String getTokenizerForLanguage(String languageTwoLetterCode) {
try (ScanResult scanResult = new ClassGraph().enableAllInfo().acceptPackages("*")
.scan()) {
@@ -42,11 +63,22 @@
return null;
}
+ /**
+ * The type Available languages list.
+ */
static class AvailableLanguagesList extends ArrayList<String> {
+ /**
+ * Instantiates a new Available languages list.
+ */
AvailableLanguagesList() {
super(listKorAPTokenizerLanguages());
}
+ /**
+ * List kor ap tokenizer languages list.
+ *
+ * @return the list
+ */
static List<String> listKorAPTokenizerLanguages() {
ArrayList<String> languages = new ArrayList<>();
try (ScanResult scanResult = new ClassGraph().enableAllInfo().acceptPackages("*")
@@ -64,11 +96,22 @@
}
}
+ /**
+ * The type Available korap tokenizer list.
+ */
static class AvailableKorapTokenizerList extends ArrayList<String> {
+ /**
+ * Instantiates a new Available korap tokenizer list.
+ */
AvailableKorapTokenizerList() {
super(listKorAPTokenizerImplementations());
}
+ /**
+ * List kor ap tokenizer implementations list.
+ *
+ * @return the list
+ */
static List<String> listKorAPTokenizerImplementations() {
List<String> korapTokenizerClassNames;
try (ScanResult scanResult = new ClassGraph().enableAllInfo().acceptPackages("*")
@@ -80,13 +123,25 @@
}
}
+ /**
+ * The Tokenizer class name.
+ */
@CommandLine.Option(names = {"-T", "--tokenizer-class"},
completionCandidates= AvailableKorapTokenizerList.class,
description = "Class name of the actual tokenizer that will be used (candidates: ${COMPLETION-CANDIDATES} default: ${DEFAULT-VALUE})")
String tokenizerClassName = DEFAULT_TOKENIZER_CLASS_NAME;
+ /**
+ * The Language.
+ */
String language = DEFAULT_LANGUAGE;
+
+ /**
+ * Sets language.
+ *
+ * @param requestedLanguage the requested language
+ */
@CommandLine.Option(names = {"-l", "--language"},
completionCandidates = AvailableLanguagesList.class,
description = "ISO-639-1 two letter language code (valid candidates: ${COMPLETION-CANDIDATES}; default: " + DEFAULT_LANGUAGE + ")")
@@ -101,31 +156,55 @@
language = requestedLanguage;
}
+ /**
+ * The Tokens.
+ */
@CommandLine.Option(names = {"--no-tokens"}, negatable = true, description = "Print tokens (default: ${DEFAULT-VALUE})")
boolean tokens = true;
+ /**
+ * The Positions.
+ */
@CommandLine.Option(names = {"-p", "--positions"}, description = "Print token start and end positions as character offsets (default: ${DEFAULT-VALUE})")
boolean positions = false;
+ /**
+ * The Sentencize.
+ */
@CommandLine.Option(names = {"-s", "--sentence-boundaries"}, description = "Print sentence boundary positions (default: ${DEFAULT-VALUE})")
boolean sentencize = false;
+ /**
+ * The Ktt.
+ */
@CommandLine.Option(names = {"-ktt"}, hidden = true, description = "Deprecated. For internal use only. (default: ${DEFAULT-VALUE})")
boolean ktt = false;
+ /**
+ * The Normalize.
+ */
@CommandLine.Option(names = {"-n", "--normalize"}, description = "Normalize tokens (default: ${DEFAULT-VALUE})")
boolean normalize = false;
+ /**
+ * The Output filename.
+ */
@SuppressWarnings("CanBeFinal")
@CommandLine.Option(names = {"-o",
"--output-file"}, paramLabel = "FILE", description = "Output file (default: ${DEFAULT-VALUE})")
String output_filename = "-";
+ /**
+ * The Encoding.
+ */
@SuppressWarnings("CanBeFinal")
@CommandLine.Option(names = {"-e",
"--encoding"}, description = "Input encoding (default: ${DEFAULT-VALUE})")
Charset encoding = StandardCharsets.UTF_8;
+ /**
+ * The Force overwrite.
+ */
@SuppressWarnings("CanBeFinal")
@CommandLine.Option(names = {"--force"}, description = "Force overwrite (default: ${DEFAULT-VALUE})")
boolean force_overwrite = false;
@@ -134,14 +213,23 @@
@CommandLine.Parameters(arity = "0..*", paramLabel = "FILES", description = "input files")
private final ArrayList<String> inputFiles = new ArrayList<>();
+ /**
+ * Instantiates a new Main.
+ */
public Main() {
}
+ /**
+ * The entry point of application.
+ *
+ * @param args the input arguments
+ */
public static void main(String[] args) {
new CommandLine(new Main()).execute(args);
}
+ /** {@inheritDoc} */
@Override
public Integer call() throws FileNotFoundException {
final PrintStream output_stream;
diff --git a/src/main/java/de/ids_mannheim/korap/tokenizer/Utils.java b/src/main/java/de/ids_mannheim/korap/tokenizer/Utils.java
index 6df7f00..40c3026 100644
--- a/src/main/java/de/ids_mannheim/korap/tokenizer/Utils.java
+++ b/src/main/java/de/ids_mannheim/korap/tokenizer/Utils.java
@@ -6,7 +6,20 @@
import java.nio.file.FileAlreadyExistsException;
import java.nio.file.Files;
+/**
+ * The type Utils.
+ *
+ * @author kupietz
+ * @version $Id: $Id
+ */
public class Utils {
+ /**
+ * Create file file.
+ *
+ * @param fname the fname
+ * @param force_overwrite the force overwrite
+ * @return the file
+ */
public static File createFile(String fname, boolean force_overwrite) {
File f = new File(fname);
try {