Add mostly auto-generated api documentation Change-Id: Ib007e4fbe69d3197a29f478a809fec4e3a541f4e

commit: d1dc8488838de6686bde867d8924b77f7f3f3574 [log] [tgz]
author: Marc Kupietz <kupietz@ids-mannheim.de> Thu Sep 07 21:28:23 2023 +0200
committer: Marc Kupietz <kupietz@ids-mannheim.de> Thu Sep 07 21:28:23 2023 +0200
tree: 263cb44c0539a52fce109452c52e86df98e41813
parent: 4128bee84abea2a66491f9242f77236e078ef551 [diff]
diff --git a/pom.xml b/pom.xml
index 037fa02..256bef1 100644
--- a/pom.xml
+++ b/pom.xml

@@ -258,6 +258,21 @@
                     </execution>
                 </executions>
             </plugin>
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-javadoc-plugin</artifactId>
+                <version>3.5.0</version>
+                <configuration>
+                    <doclint>all,-missing</doclint>
+                    <tags>
+                        <tag>
+                            <name>apiNote</name>
+                            <placement>a</placement>
+                            <head>API Note:</head>
+                        </tag>
+                    </tags>
+                </configuration>
+            </plugin>
         </plugins>
     </build>
 

diff --git a/src/main/java/de/ids_mannheim/korap/tokenizer/KorapTokenizer.java b/src/main/java/de/ids_mannheim/korap/tokenizer/KorapTokenizer.java
index cb8f280..ba5a882 100644
--- a/src/main/java/de/ids_mannheim/korap/tokenizer/KorapTokenizer.java
+++ b/src/main/java/de/ids_mannheim/korap/tokenizer/KorapTokenizer.java

@@ -6,38 +6,102 @@
 import java.io.PrintStream;
 import java.io.Reader;
 
+/**
+ * The interface Korap tokenizer.
+ *
+ * @author kupietz
+ * @version $Id: $Id
+ */
 public interface KorapTokenizer extends opennlp.tools.tokenize.Tokenizer, opennlp.tools.sentdetect.SentenceDetector {
+    /**
+     * Scan.
+     *
+     * @throws java.io.IOException the io exception
+     */
     void scan() throws IOException;
 
     /**
      * Mainly targeted language(s)
+     *
      * @return list of ISO 639 alpha-2 or alpha-3 language codes
      * @apiNote will later be used to find appropriate implementations via reflection
      */
     CharSequence[] getTargetLanguages();
-    
+
+    /**
+     * Sets input reader.
+     *
+     * @param inputReader the input reader
+     */
     void setInputReader(Reader inputReader);
 
+    /**
+     * Switches sentence splitting on or off.
+     *
+     * @param splitSentences the split sentences flag
+     */
     void setSplitSentences(boolean splitSentences);
 
+    /**
+     * Switches input echoing on or off.
+     *
+     * @param echo the echo flag
+     */
     void setEcho(boolean echo);
 
+    /**
+     * Switches offset printing on or off.
+     *
+     * @param printOffsets the print offsets
+     */
     void setPrintOffsets(boolean printOffsets);
 
+    /**
+     * Switches token printing on or off.
+     *
+     * @param tokenize the tokenize flag
+     */
     void setPrintTokens(boolean tokenize);
 
+    /**
+     * Sets output stream.
+     *
+     * @param outputStream the output stream
+     */
     void setOutputStream(PrintStream outputStream);
 
+    /**
+     * Switches normalization on or off.
+     *
+     * @param normalize the normalize flag
+     */
     void setNormalize(boolean normalize);
 
+    /** {@inheritDoc} */
     String[] tokenize(String s);
 
+    /** {@inheritDoc} */
     Span[] tokenizePos(String s);
 
+    /**
+     * Sent detect string [ ].
+     *
+     * @param s the s
+     * @return the string [ ]
+     */
     String[] sentDetect(String s);
 
+    /**
+     * Sent pos detect span [ ].
+     *
+     * @param s the s
+     * @return the span [ ]
+     */
     Span[] sentPosDetect(String s);
 
+    /**
+     * The type Builder.
+     */
     class Builder {
         private boolean splitSentences;
         private boolean echo;
@@ -48,46 +112,102 @@
         private Class tokenizerClass;
         private Reader inputReader;
 
+        /**
+         * Tokenizer class name builder.
+         *
+         * @param tokenizerClassName the tokenizer class name
+         * @return the builder
+         * @throws ClassNotFoundException the class not found exception
+         */
         public Builder tokenizerClassName(String tokenizerClassName) throws ClassNotFoundException {
             this.tokenizerClass = Class.forName(tokenizerClassName);
             return this;
         }
 
+        /**
+         * Split sentences builder.
+         *
+         * @param splitSentences the split sentences
+         * @return the builder
+         */
         public Builder splitSentences(boolean splitSentences) {
             this.splitSentences = splitSentences;
             return this;
         }
 
+        /**
+         * Sets echo.
+         *
+         * @param echo the echo
+         * @return the echo
+         */
         public Builder setEcho(boolean echo) {
             this.echo = echo;
             return this;
         }
 
+        /**
+         * Print offsets builder.
+         *
+         * @param printOffsets the print offsets
+         * @return the builder
+         */
         public Builder printOffsets(boolean printOffsets) {
             this.printOffsets = printOffsets;
             return this;
         }
 
+        /**
+         * Print tokens builder.
+         *
+         * @param printTokens the print tokens
+         * @return the builder
+         */
         public Builder printTokens(boolean printTokens) {
             this.printTokens = printTokens;
             return this;
         }
 
+        /**
+         * Input reader builder.
+         *
+         * @param inputReader the input reader
+         * @return the builder
+         */
         public Builder inputReader(Reader inputReader) {
             this.inputReader = inputReader;
             return this;
         }
 
+        /**
+         * Normalize builder.
+         *
+         * @param normalize the normalize
+         * @return the builder
+         */
         public Builder normalize(boolean normalize) {
             this.normalize = normalize;
             return this;
         }
 
+        /**
+         * Output stream builder.
+         *
+         * @param outputStream the output stream
+         * @return the builder
+         */
         public Builder outputStream(PrintStream outputStream) {
             this.outputStream = outputStream;
             return this;
         }
 
+        /**
+         * Build korap tokenizer.
+         *
+         * @return the korap tokenizer
+         * @throws IllegalAccessException the illegal access exception
+         * @throws InstantiationException the instantiation exception
+         */
         public KorapTokenizer build() throws IllegalAccessException, InstantiationException {
             KorapTokenizer korapTokenizer = (KorapTokenizer) tokenizerClass.newInstance();
             korapTokenizer.setEcho(echo);

diff --git a/src/main/java/de/ids_mannheim/korap/tokenizer/Languages.java b/src/main/java/de/ids_mannheim/korap/tokenizer/Languages.java
index 623edc8..e5e67c4 100644
--- a/src/main/java/de/ids_mannheim/korap/tokenizer/Languages.java
+++ b/src/main/java/de/ids_mannheim/korap/tokenizer/Languages.java

@@ -1,5 +1,16 @@
 package de.ids_mannheim.korap.tokenizer;
 
+/**
+ * The interface Languages.
+ *
+ * @author kupietz
+ * @version $Id: $Id
+ */
 public @interface Languages {
+    /**
+     * Value string [ ].
+     *
+     * @return the string [ ]
+     */
     String[] value();
 }

diff --git a/src/main/java/de/ids_mannheim/korap/tokenizer/Main.java b/src/main/java/de/ids_mannheim/korap/tokenizer/Main.java
index 475a843..7115928 100644
--- a/src/main/java/de/ids_mannheim/korap/tokenizer/Main.java
+++ b/src/main/java/de/ids_mannheim/korap/tokenizer/Main.java

@@ -13,16 +13,37 @@
 import java.util.concurrent.Callable;
 import java.util.stream.Collectors;
 
+/**
+ * The type Main.
+ *
+ * @author kupietz
+ * @version $Id: $Id
+ */
 @CommandLine.Command(mixinStandardHelpOptions = true,
         name = "koraptokenizer", version = "2.2.3", description = "Tokenizes (and sentence splits) text input.")
 public class Main implements Callable<Integer> {
 
+    /**
+     * The Default language.
+     */
     public final String DEFAULT_LANGUAGE = "de";
+    /**
+     * The Default tokenizer class name.
+     */
     public final String DEFAULT_TOKENIZER_CLASS_NAME = DerekoDfaTokenizer_de.class.getName();
 
+    /**
+     * The Spec.
+     */
     @CommandLine.Spec
     CommandLine.Model.CommandSpec spec;
 
+    /**
+     * Gets tokenizer for language.
+     *
+     * @param languageTwoLetterCode the language two letter code
+     * @return the tokenizer for language
+     */
     public static String getTokenizerForLanguage(String languageTwoLetterCode) {
         try (ScanResult scanResult = new ClassGraph().enableAllInfo().acceptPackages("*")
                 .scan()) {
@@ -42,11 +63,22 @@
         return null;
     }
 
+    /**
+     * The type Available languages list.
+     */
     static class AvailableLanguagesList extends ArrayList<String> {
+        /**
+         * Instantiates a new Available languages list.
+         */
         AvailableLanguagesList() {
             super(listKorAPTokenizerLanguages());
         }
 
+        /**
+         * List kor ap tokenizer languages list.
+         *
+         * @return the list
+         */
         static List<String> listKorAPTokenizerLanguages() {
             ArrayList<String> languages = new ArrayList<>();
             try (ScanResult scanResult = new ClassGraph().enableAllInfo().acceptPackages("*")
@@ -64,11 +96,22 @@
         }
     }
 
+    /**
+     * The type Available korap tokenizer list.
+     */
     static class AvailableKorapTokenizerList extends ArrayList<String> {
+        /**
+         * Instantiates a new Available korap tokenizer list.
+         */
         AvailableKorapTokenizerList() {
             super(listKorAPTokenizerImplementations());
         }
 
+        /**
+         * List kor ap tokenizer implementations list.
+         *
+         * @return the list
+         */
         static List<String> listKorAPTokenizerImplementations() {
             List<String> korapTokenizerClassNames;
             try (ScanResult scanResult = new ClassGraph().enableAllInfo().acceptPackages("*")
@@ -80,13 +123,25 @@
         }
     }
 
+    /**
+     * The Tokenizer class name.
+     */
     @CommandLine.Option(names = {"-T", "--tokenizer-class"},
             completionCandidates= AvailableKorapTokenizerList.class,
             description = "Class name of the actual tokenizer that will be used (candidates: ${COMPLETION-CANDIDATES} default: ${DEFAULT-VALUE})")
     String tokenizerClassName = DEFAULT_TOKENIZER_CLASS_NAME;
 
 
+    /**
+     * The Language.
+     */
     String language = DEFAULT_LANGUAGE;
+
+    /**
+     * Sets language.
+     *
+     * @param requestedLanguage the requested language
+     */
     @CommandLine.Option(names = {"-l", "--language"},
             completionCandidates = AvailableLanguagesList.class,
             description = "ISO-639-1 two letter language code (valid candidates: ${COMPLETION-CANDIDATES}; default: " + DEFAULT_LANGUAGE + ")")
@@ -101,31 +156,55 @@
         language = requestedLanguage;
     }
 
+    /**
+     * The Tokens.
+     */
     @CommandLine.Option(names = {"--no-tokens"}, negatable = true, description = "Print tokens (default: ${DEFAULT-VALUE})")
     boolean tokens = true;
 
+    /**
+     * The Positions.
+     */
     @CommandLine.Option(names = {"-p", "--positions"}, description = "Print token start and end positions as character offsets (default: ${DEFAULT-VALUE})")
     boolean positions = false;
 
+    /**
+     * The Sentencize.
+     */
     @CommandLine.Option(names = {"-s", "--sentence-boundaries"}, description = "Print sentence boundary positions (default: ${DEFAULT-VALUE})")
     boolean sentencize = false;
 
+    /**
+     * The Ktt.
+     */
     @CommandLine.Option(names = {"-ktt"}, hidden = true, description = "Deprecated. For internal use only. (default: ${DEFAULT-VALUE})")
     boolean ktt = false;
 
+    /**
+     * The Normalize.
+     */
     @CommandLine.Option(names = {"-n", "--normalize"}, description = "Normalize tokens (default: ${DEFAULT-VALUE})")
     boolean normalize = false;
 
+    /**
+     * The Output filename.
+     */
     @SuppressWarnings("CanBeFinal")
     @CommandLine.Option(names = {"-o",
             "--output-file"}, paramLabel = "FILE", description = "Output file (default: ${DEFAULT-VALUE})")
     String output_filename = "-";
 
+    /**
+     * The Encoding.
+     */
     @SuppressWarnings("CanBeFinal")
     @CommandLine.Option(names = {"-e",
             "--encoding"}, description = "Input encoding (default: ${DEFAULT-VALUE})")
     Charset encoding = StandardCharsets.UTF_8;
 
+    /**
+     * The Force overwrite.
+     */
     @SuppressWarnings("CanBeFinal")
     @CommandLine.Option(names = {"--force"}, description = "Force overwrite (default: ${DEFAULT-VALUE})")
     boolean force_overwrite = false;
@@ -134,14 +213,23 @@
     @CommandLine.Parameters(arity = "0..*", paramLabel = "FILES", description = "input files")
     private final ArrayList<String> inputFiles = new ArrayList<>();
 
+    /**
+     * Instantiates a new Main.
+     */
     public Main() {
 
     }
 
+    /**
+     * The entry point of application.
+     *
+     * @param args the input arguments
+     */
     public static void main(String[] args) {
         new CommandLine(new Main()).execute(args);
     }
 
+    /** {@inheritDoc} */
     @Override
     public Integer call() throws FileNotFoundException {
         final PrintStream output_stream;

diff --git a/src/main/java/de/ids_mannheim/korap/tokenizer/Utils.java b/src/main/java/de/ids_mannheim/korap/tokenizer/Utils.java
index 6df7f00..40c3026 100644
--- a/src/main/java/de/ids_mannheim/korap/tokenizer/Utils.java
+++ b/src/main/java/de/ids_mannheim/korap/tokenizer/Utils.java

@@ -6,7 +6,20 @@
 import java.nio.file.FileAlreadyExistsException;
 import java.nio.file.Files;
 
+/**
+ * The type Utils.
+ *
+ * @author kupietz
+ * @version $Id: $Id
+ */
 public class Utils {
+    /**
+     * Create file file.
+     *
+     * @param fname           the fname
+     * @param force_overwrite the force overwrite
+     * @return the file
+     */
     public static File createFile(String fname, boolean force_overwrite) {
         File f = new File(fname);
         try {
commit	d1dc8488838de6686bde867d8924b77f7f3f3574	[log] [tgz]
author	Marc Kupietz <kupietz@ids-mannheim.de>	Thu Sep 07 21:28:23 2023 +0200
committer	Marc Kupietz <kupietz@ids-mannheim.de>	Thu Sep 07 21:28:23 2023 +0200
tree	263cb44c0539a52fce109452c52e86df98e41813
parent	4128bee84abea2a66491f9242f77236e078ef551 [diff]