/*
 * Decompiled with CFR 0.152.
 */
package de.ids_mannheim.korap.tokenizer;

import de.ids_mannheim.korap.tokenizer.DerekoDfaTokenizer_de;
import de.ids_mannheim.korap.tokenizer.KorapTokenizer;
import de.ids_mannheim.korap.tokenizer.Languages;
import de.ids_mannheim.korap.tokenizer.Utils;
import io.github.classgraph.AnnotationInfo;
import io.github.classgraph.AnnotationParameterValue;
import io.github.classgraph.ClassGraph;
import io.github.classgraph.ClassInfoList;
import io.github.classgraph.ScanResult;
import java.io.BufferedOutputStream;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.PrintStream;
import java.net.URL;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Enumeration;
import java.util.List;
import java.util.concurrent.Callable;
import java.util.jar.Attributes;
import java.util.jar.Manifest;
import java.util.stream.Collectors;
import picocli.CommandLine;

@CommandLine.Command(mixinStandardHelpOptions=true, name="koraptokenizer", versionProvider=ManifestVersionProvider.class, description={"Tokenizes (and sentence splits) text input."})
public class Main
implements Callable<Integer> {
    public final String DEFAULT_LANGUAGE = "de";
    public final String DEFAULT_TOKENIZER_CLASS_NAME;
    @CommandLine.Spec
    CommandLine.Model.CommandSpec spec;
    @CommandLine.Option(names={"-T", "--tokenizer-class"}, completionCandidates=AvailableKorapTokenizerList.class, description={"Class name of the actual tokenizer that will be used (candidates: ${COMPLETION-CANDIDATES} default: ${DEFAULT-VALUE})"})
    String tokenizerClassName = this.DEFAULT_TOKENIZER_CLASS_NAME = DerekoDfaTokenizer_de.class.getName();
    String language = "de";
    @CommandLine.Option(names={"--no-tokens"}, negatable=true, description={"Print tokens (default: ${DEFAULT-VALUE})"})
    boolean tokens = true;
    @CommandLine.Option(names={"-p", "--positions"}, description={"Print token start and end positions as character offsets (default: ${DEFAULT-VALUE})"})
    boolean positions = false;
    @CommandLine.Option(names={"-s", "--sentence-boundaries"}, description={"Print sentence boundary positions (default: ${DEFAULT-VALUE})"})
    boolean sentencize = false;
    @CommandLine.Option(names={"-ktt"}, hidden=true, description={"Deprecated. For internal use only. (default: ${DEFAULT-VALUE})"})
    boolean ktt = false;
    @CommandLine.Option(names={"-n", "--normalize"}, description={"Normalize tokens (default: ${DEFAULT-VALUE})"})
    boolean normalize = false;
    @CommandLine.Option(names={"-o", "--output-file"}, paramLabel="FILE", description={"Output file (default: ${DEFAULT-VALUE})"})
    String output_filename = "-";
    @CommandLine.Option(names={"-e", "--encoding"}, description={"Input encoding (default: ${DEFAULT-VALUE})"})
    Charset encoding = StandardCharsets.UTF_8;
    @CommandLine.Option(names={"--force"}, description={"Force overwrite (default: ${DEFAULT-VALUE})"})
    boolean force_overwrite = false;
    @CommandLine.Parameters(arity="0..*", paramLabel="FILES", description={"input files"})
    private final ArrayList<String> inputFiles = new ArrayList();

    public static String getTokenizerForLanguage(String languageTwoLetterCode) {
        try (ScanResult scanResult = new ClassGraph().enableAllInfo().acceptPackages("*").scan();){
            ClassInfoList korapTokenizerClasses = scanResult.getClassesImplementing("de.ids_mannheim.korap.tokenizer.KorapTokenizer");
            for (String n : korapTokenizerClasses.getNames()) {
                AnnotationInfo v = scanResult.getClassInfo(n).getAnnotationInfo(Languages.class.getName());
                if (v == null) continue;
                for (AnnotationParameterValue i : v.getParameterValues()) {
                    for (String lang : (String[])i.getValue()) {
                        if (!lang.equals(languageTwoLetterCode)) continue;
                        String string = n;
                        return string;
                    }
                }
            }
        }
        return null;
    }

    @CommandLine.Option(names={"-l", "--language"}, completionCandidates=AvailableLanguagesList.class, description={"ISO-639-1 two letter language code (valid candidates: ${COMPLETION-CANDIDATES}; default: de)"})
    public void setLanguage(String requestedLanguage) {
        this.tokenizerClassName = Main.getTokenizerForLanguage(requestedLanguage);
        if (this.tokenizerClassName == null) {
            throw new CommandLine.ParameterException(this.spec.commandLine(), String.format("Invalid value '%s' for option '--language': (use one of: %s).", this.language, AvailableLanguagesList.listKorAPTokenizerLanguages()));
        }
        this.language = requestedLanguage;
    }

    public static void main(String[] args) {
        new CommandLine(new Main()).execute(args);
    }

    @Override
    public Integer call() throws FileNotFoundException {
        PrintStream output_stream;
        if (this.output_filename == null || this.output_filename.equals("-")) {
            output_stream = System.out;
        } else {
            File f = Utils.createFile(this.output_filename, this.force_overwrite);
            output_stream = new PrintStream(new BufferedOutputStream(new FileOutputStream(f)));
        }
        for (int i = 0; i < this.inputFiles.size() || i == 0 && this.inputFiles.size() == 0; ++i) {
            String fn = this.inputFiles.size() > 0 ? this.inputFiles.get(i) : "-";
            try {
                BufferedReader br = "-".equals(fn) ? new BufferedReader(new InputStreamReader(System.in, this.encoding)) : Files.newBufferedReader(new File(fn).toPath(), this.encoding);
                new KorapTokenizer.Builder().tokenizerClassName(this.tokenizerClassName).inputReader(br).outputStream(output_stream).printTokens(this.tokens).printOffsets(this.positions).normalize(this.normalize).splitSentences(this.sentencize).setEcho(true).build().scan();
                continue;
            }
            catch (FileNotFoundException e) {
                System.err.println("File not found : \"" + fn + "\"");
                continue;
            }
            catch (IOException e) {
                System.err.println("IO error scanning file \"" + fn + "\"");
                System.err.println(e);
                continue;
            }
            catch (Exception e) {
                System.err.println("Unexpected exception:");
                e.printStackTrace();
            }
        }
        if (this.output_filename != null && !this.output_filename.equals("-")) {
            output_stream.close();
        }
        return 0;
    }

    static class AvailableLanguagesList
    extends ArrayList<String> {
        AvailableLanguagesList() {
            super(AvailableLanguagesList.listKorAPTokenizerLanguages());
        }

        static List<String> listKorAPTokenizerLanguages() {
            ArrayList<String> languages = new ArrayList<String>();
            try (ScanResult scanResult = new ClassGraph().enableAllInfo().acceptPackages("*").scan();){
                ClassInfoList korapTokenizerClasses = scanResult.getClassesImplementing("de.ids_mannheim.korap.tokenizer.KorapTokenizer");
                for (String n : korapTokenizerClasses.getNames()) {
                    AnnotationInfo v = scanResult.getClassInfo(n).getAnnotationInfo(Languages.class.getName());
                    if (v == null) continue;
                    for (AnnotationParameterValue i : v.getParameterValues()) {
                        languages.addAll(Arrays.asList((String[])i.getValue()));
                    }
                }
            }
            return languages.stream().sorted().distinct().collect(Collectors.toList());
        }
    }

    static class ManifestVersionProvider
    implements CommandLine.IVersionProvider {
        ManifestVersionProvider() {
        }

        @Override
        public String[] getVersion() throws Exception {
            Enumeration<URL> resources = CommandLine.class.getClassLoader().getResources("META-INF/MANIFEST.MF");
            while (resources.hasMoreElements()) {
                URL url = resources.nextElement();
                try {
                    Manifest manifest = new Manifest(url.openStream());
                    if (!this.isApplicableManifest(manifest)) continue;
                    Attributes attr = manifest.getMainAttributes();
                    return new String[]{(String)ManifestVersionProvider.get(attr, "Implementation-Version")};
                }
                catch (IOException ex) {
                    return new String[]{"Unable to read from " + String.valueOf(url) + ": " + String.valueOf(ex)};
                }
            }
            return new String[0];
        }

        private boolean isApplicableManifest(Manifest manifest) {
            Attributes attributes = manifest.getMainAttributes();
            return "KorAP-Tokenizer".equals(ManifestVersionProvider.get(attributes, "Implementation-Title"));
        }

        private static Object get(Attributes attributes, String key) {
            return attributes.get(new Attributes.Name(key));
        }
    }

    static class AvailableKorapTokenizerList
    extends ArrayList<String> {
        AvailableKorapTokenizerList() {
            super(AvailableKorapTokenizerList.listKorAPTokenizerImplementations());
        }

        static List<String> listKorAPTokenizerImplementations() {
            List<String> korapTokenizerClassNames;
            try (ScanResult scanResult = new ClassGraph().enableAllInfo().acceptPackages("*").scan();){
                ClassInfoList korapTokenizerClasses = scanResult.getClassesImplementing("de.ids_mannheim.korap.tokenizer.KorapTokenizer");
                korapTokenizerClassNames = korapTokenizerClasses.getNames();
            }
            return korapTokenizerClassNames;
        }
    }
}

