/*
 * Decompiled with CFR 0.152.
 */
package marmot.morph;

import java.io.File;
import java.io.Serializable;
import java.security.InvalidParameterException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import lemming.lemma.BackupLemmatizer;
import lemming.lemma.GoldLemmaGenerator;
import lemming.lemma.LemmaCandidate;
import lemming.lemma.LemmaCandidateGenerator;
import lemming.lemma.LemmaCandidateSet;
import lemming.lemma.LemmaInstance;
import lemming.lemma.LemmatizerGeneratorTrainer;
import lemming.lemma.SimpleLemmatizer;
import lemming.lemma.SimpleLemmatizerTrainer;
import lemming.lemma.ranker.Ranker;
import lemming.lemma.ranker.RankerCandidate;
import lemming.lemma.ranker.RankerInstance;
import lemming.lemma.ranker.RankerModel;
import lemming.lemma.ranker.RankerTrainer;
import lemming.lemma.toutanova.EditTreeAligner;
import lemming.lemma.toutanova.EditTreeAlignerTrainer;
import marmot.core.Model;
import marmot.core.Options;
import marmot.core.Sequence;
import marmot.core.State;
import marmot.core.Tagger;
import marmot.core.Token;
import marmot.core.Trainer;
import marmot.core.TrainerFactory;
import marmot.morph.MorphEvaluator;
import marmot.morph.MorphOptions;
import marmot.morph.MorphResult;
import marmot.morph.MorphTagger;
import marmot.morph.MorphWeightVector;
import marmot.morph.Word;
import marmot.morph.analyzer.Analyzer;
import marmot.morph.signature.Trie;
import marmot.util.Copy;
import marmot.util.Counter;
import marmot.util.FeatUtil;
import marmot.util.FileUtils;
import marmot.util.StringUtils;
import marmot.util.SymbolTable;

public class MorphModel
extends Model {
    private static final long serialVersionUID = 2L;
    private static final int POS_INDEX_ = 0;
    private static final int MORPH_INDEX_ = 1;
    private static final String POS_NAME_ = "pos";
    private static final String MORPH_NAME_ = "morph";
    private SymbolTable<String> word_table_;
    private SymbolTable<String> shape_table_;
    private SymbolTable<Character> char_table_;
    private SymbolTable<String> token_feature_table_;
    private SymbolTable<String> weighted_token_feature_table_;
    private List<SymbolTable<String>> subtag_tables_;
    private transient Map<String, Integer> signature_cache;
    private int[] vocab_;
    private int[][] tag_classes_;
    private int[][] transitions_;
    private int[][][] tag_to_subtag_;
    private List<Set<Integer>> observed_sets_;
    private int[][] word_to_observed_tags_;
    private Trie trie_;
    private boolean verbose_;
    private boolean shape_;
    private boolean tag_morph_;
    private int num_folds_;
    private int rare_word_max_freq_;
    private boolean split_morphs_;
    private boolean split_pos_;
    private StringUtils.Mode normalize_forms_;
    private Analyzer analyzer_;
    private RankerModel lemma_model_;
    private List<LemmaCandidateGenerator> generators_;
    private transient Map<String, List<RankerInstance>> lemma_instance_map_;
    private transient Set<Character> unseen_char_set_;
    private boolean special_signature_;
    private boolean skip_lemma_;
    private boolean marginalize_lemmas_;
    private boolean lemma_use_morph_;
    private boolean lemma_tag_dependent_;
    private boolean restrict_pos_tags_to_seen_combinations_;
    boolean lemma_prepruning_extraction_ = true;

    public void init(MorphOptions options, Collection<Sequence> sentences) {
        Word word;
        this.verbose_ = options.getVerbose();
        this.rare_word_max_freq_ = options.getRareWordMaxFreq();
        this.shape_ = options.getShape();
        this.tag_morph_ = options.getTagMorph();
        this.split_pos_ = options.getSplitPos();
        this.split_morphs_ = options.getSplitMorphs();
        this.normalize_forms_ = options.getNormalizeForms();
        this.special_signature_ = options.getSpecialSignature();
        this.num_folds_ = options.getNumFolds();
        this.restrict_pos_tags_to_seen_combinations_ = options.getRestrictPosTagsToSeenCombinations();
        this.init((Options)options, this.extractCategories(sentences));
        this.subtag_tables_ = new ArrayList<SymbolTable<String>>();
        this.subtag_tables_.add(null);
        this.subtag_tables_.add(null);
        if (this.split_pos_) {
            this.subtag_tables_.set(0, new SymbolTable());
        }
        if (this.tag_morph_ && this.split_morphs_) {
            this.subtag_tables_.set(1, new SymbolTable());
        }
        this.word_table_ = new SymbolTable(true);
        this.char_table_ = new SymbolTable();
        if (this.shape_) {
            this.shape_table_ = new SymbolTable();
        }
        this.signature_cache = new HashMap<String, Integer>();
        this.token_feature_table_ = new SymbolTable();
        this.weighted_token_feature_table_ = new SymbolTable();
        String internal_analyzer = options.getInternalAnalyzer();
        if (internal_analyzer != null) {
            this.analyzer_ = Analyzer.create(internal_analyzer);
        }
        if (this.shape_) {
            File file = null;
            if (!options.getShapeTriePath().isEmpty()) {
                file = new File(options.getShapeTriePath());
            }
            if (file == null || !file.exists()) {
                if (this.verbose_) {
                    System.err.println("Inducing shape trie.");
                }
                this.trie_ = Trie.train(sentences, options.getVeryVerbose());
                if (file != null) {
                    if (this.verbose_) {
                        System.err.format("Writing shape trie to: %s.\n", options.getShapeTriePath());
                    }
                    FileUtils.saveToFile((Serializable)this.trie_, options.getShapeTriePath());
                }
            } else {
                System.err.format("Loading shape trie from: %s.\n", options.getShapeTriePath());
                this.trie_ = (Trie)FileUtils.loadFromFile(options.getShapeTriePath());
            }
        }
        if (this.trie_ == null) {
            this.shape_ = false;
        }
        for (Sequence sentence : sentences) {
            for (Token token : sentence) {
                word = (Word)token;
                this.addIndexes(word, true);
            }
        }
        this.vocab_ = this.extractVocabulary(options, sentences);
        this.transitions_ = this.extractPossibleTransitions(options, sentences);
        this.observed_sets_ = this.extractObservedSets(sentences);
        this.tag_classes_ = this.extractTagClasses(this.getTagTables());
        this.tag_to_subtag_ = this.extractSubTags(options.getSubTagSeparator());
        for (Sequence sentence : sentences) {
            for (Token token : sentence) {
                word = (Word)token;
                this.addShape(word, word.getWordForm(), true);
            }
        }
        if (options.getLemmatizer()) {
            this.initLemmatizer(options, sentences);
        }
    }

    private void initLemmatizer(MorphOptions options, Collection<Sequence> sentences) {
        this.lemma_use_morph_ = options.getLemmaUseMorph();
        this.marginalize_lemmas_ = options.getMarginalizeLemmas();
        this.lemma_prepruning_extraction_ = options.getLemmaPrePruningExtraction();
        this.lemma_tag_dependent_ = options.getLemmaTagDependent();
        RankerTrainer.RankerTrainerOptions roptions = new RankerTrainer.RankerTrainerOptions();
        roptions.setOption("unigram-file", options.getLemmaUnigramFile());
        roptions.setOption("ignore-features", options.getLemmaIgnoreFeatures());
        roptions.setOption("aspell-path", options.getLemmaAspellPath());
        roptions.setOption("aspell-lang", options.getLemmaAspellLang());
        roptions.setOption("use-shape-lexicon", options.getLemmaUseShapeLexicon());
        roptions.setOption("cluster-file", options.getLemmaClusterFile());
        roptions.setOption("tag-dependent", this.lemma_tag_dependent_);
        roptions.setOption("offline-feature-extraction", false);
        roptions.setOption("use-hash-feature-table", options.getUseHashFeatureTable());
        List<LemmaInstance> instances = LemmaInstance.getInstances(sentences, true, false);
        if (options.getGoldLemma()) {
            this.generators_ = Collections.singletonList(new GoldLemmaGenerator());
        } else if (options.getLemmaUseLemmingGenerator() > 0) {
            LemmatizerGeneratorTrainer trainer = new RankerTrainer();
            RankerTrainer.RankerTrainerOptions new_roptions = new RankerTrainer.RankerTrainerOptions(roptions);
            new_roptions.setOption("use-mallet", false);
            new_roptions.setOption("use-perceptron", false);
            new_roptions.setOption("use-morph", false);
            new_roptions.setOption("use-shape-lexicon", true);
            new_roptions.setOption("use-core-features", true);
            new_roptions.setOption("use-alignment-features", true);
            new_roptions.setOption("offline-feature-extraction", false);
            new_roptions.setOption("tag-dependent", true);
            new_roptions.setOption("use-hash-feature-table", true);
            ((RankerTrainer)trainer).setOptions(new_roptions);
            Ranker ranker = (Ranker)trainer.train((List)instances, (List)null);
            ranker.setNumCandidates(options.getLemmaUseLemmingGenerator());
            trainer = new SimpleLemmatizerTrainer();
            trainer.getOptions().setOption("use-backup", false);
            SimpleLemmatizer simple = (SimpleLemmatizer)trainer.train((List)instances, (List)null);
            this.generators_ = Collections.singletonList(new BackupLemmatizer(simple, ranker));
        } else {
            this.generators_ = roptions.getGenerators(instances);
        }
        SymbolTable<String> pos_table = this.getTagTables().get(0);
        for (Sequence sentence : sentences) {
            for (Token token : sentence) {
                Word word = (Word)token;
                this.addRankerInstances(word);
            }
        }
        SymbolTable<String> morph_table = null;
        if (1 < this.subtag_tables_.size()) {
            morph_table = this.subtag_tables_.get(1);
        }
        EditTreeAlignerTrainer trainer = new EditTreeAlignerTrainer(roptions.getRandom(), false, 1, -1);
        EditTreeAligner aligner = (EditTreeAligner)trainer.train(instances);
        LinkedList<RankerInstance> rinstances = new LinkedList<RankerInstance>();
        for (List<RankerInstance> list : this.lemma_instance_map_.values()) {
            for (RankerInstance instance : list) {
                if (instance == null) continue;
                rinstances.add(instance);
            }
        }
        this.lemma_model_ = new RankerModel();
        this.lemma_model_.init(roptions, rinstances, aligner, pos_table, morph_table);
        this.skip_lemma_ = options.getLemmaPretraining();
    }

    private int getBiIndex(int word, int level, int tag) {
        int length = 1;
        for (int clevel = 0; clevel <= level; ++clevel) {
            length *= this.getTagTables().get(clevel).size();
        }
        assert (tag < length);
        return word * length + tag;
    }

    public boolean hasBeenObserved(int form_index, int level, int tag_index) {
        if (this.isRare(form_index)) {
            form_index = this.word_table_.size();
        }
        Set<Integer> set = this.observed_sets_.get(level);
        int index = this.getBiIndex(form_index, level, tag_index);
        return set.contains(index);
    }

    private int[][][] extractSubTags(String subtag_separator) {
        int[][][] tag_to_subtag = new int[this.subtag_tables_.size()][][];
        int offset = 0;
        for (int level = 0; level < this.subtag_tables_.size() && level < this.getTagTables().size(); ++level) {
            SymbolTable<String> table = this.getTagTables().get(level);
            if (table == null || this.subtag_tables_.get(level) == null) continue;
            tag_to_subtag[level] = new int[table.size()][];
            for (Map.Entry<String, Integer> entry : table.entrySet()) {
                tag_to_subtag[level][entry.getValue().intValue()] = this.getSubTags(entry.getKey(), level, true, offset, subtag_separator);
            }
            offset += this.subtag_tables_.get(level).size();
        }
        return tag_to_subtag;
    }

    private int[][] extractTagClasses(List<SymbolTable<String>> tag_tables) {
        int[][] tag_classes = new int[tag_tables.size()][];
        for (int level = 0; level < tag_tables.size(); ++level) {
            int num_tags = tag_tables.get(level).size();
            tag_classes[level] = new int[num_tags - 1];
            int index = 0;
            for (int tag_index = 0; tag_index < num_tags; ++tag_index) {
                if (tag_index == this.getBoundaryIndex()) continue;
                tag_classes[level][index] = tag_index;
                ++index;
            }
        }
        return tag_classes;
    }

    /*
     * WARNING - void declaration
     */
    private List<Set<Integer>> extractObservedSets(Collection<Sequence> sentences) {
        void var6_11;
        Object tags;
        List<SymbolTable<String>> tag_tables = this.getTagTables();
        ArrayList<Set<Integer>> observed_sets = new ArrayList<Set<Integer>>(tag_tables.size());
        ArrayList wordform_to_candidates = new ArrayList();
        for (int level = 0; level < tag_tables.size(); ++level) {
            wordform_to_candidates.add(new HashMap());
        }
        for (Sequence sequence : sentences) {
            for (Token xtoken : sequence) {
                Word token = (Word)xtoken;
                int n = token.getWordFormIndex();
                int tag_index = 0;
                for (int level = 0; level < tag_tables.size(); ++level) {
                    tag_index *= tag_tables.get(level).size();
                    tags = ((Map)wordform_to_candidates.get(level)).computeIfAbsent(n, k -> new HashSet());
                    tags.add(tag_index += token.getTagIndexes()[level]);
                }
            }
        }
        if (this.restrict_pos_tags_to_seen_combinations_) {
            this.word_to_observed_tags_ = new int[this.vocab_.length][];
            for (Map.Entry entry : ((Map)wordform_to_candidates.get(0)).entrySet()) {
                int word_index = (Integer)entry.getKey();
                if (this.isRare(word_index)) continue;
                Set tag_set = (Set)entry.getValue();
                int[] tags2 = new int[tag_set.size()];
                int n = 0;
                Iterator tag_index = tag_set.iterator();
                while (tag_index.hasNext()) {
                    int tag = (Integer)tag_index.next();
                    tags2[n++] = tag;
                }
                this.word_to_observed_tags_[word_index] = tags2;
            }
        }
        List<List<Integer>> open_tag_classes_per_level = MorphModel.getOpenPosTagClassesCrossValidation(sentences, this.num_folds_, tag_tables);
        boolean bl = false;
        while (var6_11 < tag_tables.size()) {
            HashSet<Integer> observed_set = new HashSet<Integer>();
            observed_sets.add(observed_set);
            List<Integer> open_tag_classes = open_tag_classes_per_level.get((int)var6_11);
            Iterator<Object> iterator2 = open_tag_classes.iterator();
            while (iterator2.hasNext()) {
                int n = iterator2.next();
                int biindex = this.getBiIndex(this.word_table_.size(), (int)var6_11, n);
                observed_set.add(biindex);
            }
            for (Map.Entry entry : ((Map)wordform_to_candidates.get((int)var6_11)).entrySet()) {
                int word_index = (Integer)entry.getKey();
                Set set = (Set)entry.getValue();
                if (this.isRare(word_index)) continue;
                tags = new int[set.size()];
                int index = 0;
                Iterator iterator3 = set.iterator();
                while (iterator3.hasNext()) {
                    int tag = (Integer)iterator3.next();
                    tags[index++] = tag;
                }
                for (Object tag : tags) {
                    int biindex = this.getBiIndex(word_index, (int)var6_11, (int)tag);
                    observed_set.add(biindex);
                }
            }
            ++var6_11;
        }
        return observed_sets;
    }

    public static List<List<Integer>> getOpenPosTagClassesCrossValidation(Collection<Sequence> sentences, int num_folds, List<SymbolTable<String>> tag_tables) {
        int sentences_per_fold = sentences.size() / num_folds;
        if (sentences_per_fold == 0) {
            sentences_per_fold = 1;
        }
        HashSet<Integer> known = new HashSet<Integer>();
        ArrayList counters = new ArrayList(tag_tables.size());
        for (int level = 0; level < tag_tables.size(); ++level) {
            counters.add(new Counter());
        }
        int start_index = 0;
        while (start_index < sentences.size()) {
            known.clear();
            int end_index = start_index + sentences_per_fold;
            if (end_index + sentences_per_fold >= sentences.size()) {
                end_index = sentences.size();
            }
            int index = 0;
            for (Sequence sentence : sentences) {
                if (index < start_index || index >= end_index) {
                    for (Token token : sentence) {
                        known.add(((Word)token).getWordFormIndex());
                    }
                }
                ++index;
            }
            index = 0;
            for (Sequence sentence : sentences) {
                if (index >= start_index && index < end_index) {
                    for (Token token : sentence) {
                        int form = ((Word)token).getWordFormIndex();
                        if (known.contains(form)) continue;
                        int tag_index = 0;
                        for (int level = 0; level < tag_tables.size(); ++level) {
                            tag_index *= tag_tables.get(level).size();
                            ((Counter)counters.get(level)).increment(tag_index += token.getTagIndexes()[level], 1.0);
                        }
                    }
                }
                ++index;
            }
            start_index = end_index;
        }
        ArrayList<List<Integer>> list = new ArrayList<List<Integer>>(tag_tables.size());
        for (int level = 0; level < tag_tables.size(); ++level) {
            Counter counter = (Counter)counters.get(level);
            double total_count = counter.totalCount();
            LinkedList<Integer> open_tag_classes = new LinkedList<Integer>();
            for (Map.Entry entry : counter.entrySet()) {
                if (!(entry.getValue() / total_count > 1.0E-4)) continue;
                open_tag_classes.add((Integer)entry.getKey());
            }
            list.add(open_tag_classes);
        }
        return list;
    }

    private int[] extractVocabulary(MorphOptions options, Collection<Sequence> sentences) {
        Counter<Integer> vocab_counter = new Counter<Integer>();
        for (Sequence sentence : sentences) {
            for (Token token : sentence) {
                Word word = (Word)token;
                int word_index = word.getWordFormIndex();
                vocab_counter.increment(word_index, 1.0);
            }
        }
        int[] vocab_array = new int[vocab_counter.size()];
        for (Map.Entry entry : vocab_counter.entrySet()) {
            vocab_array[((Integer)entry.getKey()).intValue()] = entry.getValue().intValue();
        }
        return vocab_array;
    }

    private int[][] extractPossibleTransitions(MorphOptions options, Collection<Sequence> sentences) {
        if (!options.getRestricTransitions() || !this.tag_morph_) {
            return null;
        }
        HashMap<Integer, Set> tag_to_morph = new HashMap<Integer, Set>();
        for (Sequence sentence : sentences) {
            for (Token token : sentence) {
                int from_index = token.getTagIndexes()[0];
                int to_index = token.getTagIndexes()[1];
                Set tags = tag_to_morph.computeIfAbsent(from_index, k -> new HashSet());
                tags.add(to_index);
            }
        }
        int[][] transitions = new int[tag_to_morph.size() + 1][];
        transitions[0] = new int[1];
        for (Map.Entry entry : tag_to_morph.entrySet()) {
            int from_index = (Integer)entry.getKey();
            int[] to_indexes = new int[((Set)entry.getValue()).size()];
            int index = 0;
            Iterator iterator2 = ((Set)entry.getValue()).iterator();
            while (iterator2.hasNext()) {
                int to_index = (Integer)iterator2.next();
                to_indexes[index++] = to_index;
            }
            Arrays.sort(to_indexes);
            assert (transitions[from_index] == null);
            transitions[from_index] = to_indexes;
        }
        return transitions;
    }

    private SymbolTable<String> extractCategories(Collection<Sequence> sentences) {
        SymbolTable<String> catgegory_table = new SymbolTable<String>(true);
        catgegory_table.toIndex(POS_NAME_, true);
        if (this.tag_morph_) {
            catgegory_table.toIndex(MORPH_NAME_, true);
        }
        return catgegory_table;
    }

    private void addCharIndexes(Word word, String form, boolean insert) {
        short[] char_indexes = FeatUtil.getCharIndexes(form, this.char_table_, insert);
        assert (char_indexes != null);
        for (int index = 0; index < form.length(); ++index) {
            char c = form.charAt(index);
            if (char_indexes[index] >= 0 || !this.verbose_) continue;
            if (this.unseen_char_set_ == null) {
                this.unseen_char_set_ = new HashSet<Character>();
            }
            if (this.unseen_char_set_.contains(Character.valueOf(c))) continue;
            System.err.format("Warning: Unknown character: %c\n", Character.valueOf(c));
            this.unseen_char_set_.add(Character.valueOf(c));
        }
        word.setCharIndexes(char_indexes);
    }

    private void addSignature(Word word, String form, boolean insert) {
        Integer signature;
        if (this.signature_cache == null) {
            this.signature_cache = new HashMap<String, Integer>();
        }
        if ((signature = this.signature_cache.get(form)) == null) {
            signature = FeatUtil.getSignature(form, this.special_signature_);
            this.signature_cache.put(form, signature);
        }
        word.setWordSignature(signature);
    }

    private void addTokenFeatures(Word word, Word in_word, boolean insert) {
        int index;
        int[] indexes;
        String[] token_features = in_word.getTokenFeatures();
        List<String> readings = null;
        if (this.analyzer_ != null) {
            readings = this.analyzer_.analyze(in_word.getWordForm());
        }
        int indexes_length = 0;
        if (token_features != null) {
            indexes_length += token_features.length;
        }
        if (readings != null) {
            indexes_length += readings.size();
        }
        if (indexes_length > 0) {
            indexes = new int[indexes_length];
            index = 0;
            if (token_features != null) {
                for (String string : token_features) {
                    indexes[index] = this.token_feature_table_.toIndex(string, -1, insert);
                    ++index;
                }
            }
            if (readings != null) {
                for (String feature : readings) {
                    indexes[index] = this.token_feature_table_.toIndex(feature, -1, insert);
                    ++index;
                }
            }
            word.setTokenFeatureIndexes(indexes);
        }
        if ((token_features = word.getWeightedTokenFeatures()) != null && this.weighted_token_feature_table_ != null) {
            indexes = new int[token_features.length];
            index = 0;
            for (String string : token_features) {
                indexes[index] = this.weighted_token_feature_table_.toIndex(string, -1, insert);
                ++index;
            }
            word.setWeightedTokenFeatureIndexes(indexes);
        }
    }

    public void addIndexes(Word word, boolean insert) {
        String word_form = word.getWordForm();
        this.addTagIndexes(word, -1, insert);
        this.addSignature(word, word_form, insert);
        this.addTokenFeatures(word, word, insert);
        this.addShape(word, word_form, insert);
        String normalized_form = StringUtils.normalize(word_form, this.normalize_forms_);
        int word_index = this.word_table_.toIndex(normalized_form, -1, insert);
        word.setWordIndex(word_index);
        this.addCharIndexes(word, normalized_form, insert);
    }

    private RankerInstance getRankerInstance(Word word, int pos_index, boolean training) {
        List<RankerInstance> instances = word.getRankerIstances();
        if (instances == null) {
            instances = this.addRankerInstances(word);
        }
        if (!this.lemma_tag_dependent_) {
            pos_index = 0;
        }
        RankerInstance instance = instances.get(pos_index);
        assert (instance != null);
        return instance;
    }

    private List<RankerInstance> addRankerInstances(Word word) {
        List<RankerInstance> instances;
        if (this.lemma_instance_map_ == null) {
            this.lemma_instance_map_ = new HashMap<String, List<RankerInstance>>();
        }
        if ((instances = this.lemma_instance_map_.get(word.getWordForm())) == null) {
            SymbolTable<String> pos_table = this.getTagTables().get(0);
            if (this.lemma_tag_dependent_) {
                instances = new ArrayList<RankerInstance>(pos_table.size());
                for (int index = 0; index < pos_table.size(); ++index) {
                    instances.add(null);
                }
                LemmaCandidateSet total_set = new LemmaCandidateSet();
                for (Map.Entry<String, Integer> entry : pos_table.entrySet()) {
                    int current_pos_index = entry.getValue();
                    String current_pos = entry.getKey();
                    if (this.restrict_pos_tags_to_seen_combinations_ && !this.isRare(word.getWordFormIndex()) && !this.hasBeenObserved(word.getWordFormIndex(), 0, current_pos_index)) continue;
                    RankerInstance rinstance = this.getRankerInstance(word, current_pos, total_set);
                    instances.set(current_pos_index, rinstance);
                }
            } else {
                RankerInstance rinstance = this.getRankerInstance(word, "_", null);
                instances = Collections.singletonList(rinstance);
            }
            this.lemma_instance_map_.put(word.getWordForm(), instances);
        }
        word.setRankerIstances(instances);
        return instances;
    }

    private RankerInstance getRankerInstance(Word word, String pos, LemmaCandidateSet total_set) {
        LemmaInstance instance = LemmaInstance.getInstance(word, false, false);
        instance.setPosTag(pos);
        RankerInstance rinstance = RankerInstance.getInstance(instance, this.generators_);
        instance.setPosTag(null);
        if (total_set != null) {
            LemmaCandidateSet new_set = new LemmaCandidateSet();
            for (Map.Entry<String, LemmaCandidate> entry : rinstance.getCandidateSet()) {
                LemmaCandidate candidate = total_set.getCandidate(entry.getKey());
                new_set.addCandidate(entry.getKey(), candidate);
            }
            rinstance.setCandidateSet(new_set);
        }
        if (rinstance.getCandidateSet().size() == 0) {
            if (total_set == null) {
                rinstance.getCandidateSet().getCandidate(instance.getForm());
            } else {
                rinstance.getCandidateSet().addCandidate(instance.getForm(), total_set.getCandidate(instance.getForm()));
            }
        }
        return rinstance;
    }

    private int[] getSubTags(String morph, int level, boolean insert, int offset, String subtag_separator) {
        if (morph.equals("<STOP>")) {
            return null;
        }
        if (morph.equals("_")) {
            return null;
        }
        if (level >= this.subtag_tables_.size()) {
            return null;
        }
        SymbolTable<String> subtag_table = this.subtag_tables_.get(level);
        if (subtag_table == null) {
            return null;
        }
        String[] sub_tags = morph.split(subtag_separator);
        if (sub_tags.length == 1) {
            return null;
        }
        LinkedList<Integer> indexes = new LinkedList<Integer>();
        for (String sub_tag : sub_tags) {
            int value;
            if (sub_tag.length() <= 0 || (value = subtag_table.toIndex(sub_tag, -1, insert)) < 0) continue;
            indexes.add(value);
        }
        int[] array = new int[indexes.size()];
        int i = 0;
        Iterator iterator2 = indexes.iterator();
        while (iterator2.hasNext()) {
            int index = (Integer)iterator2.next();
            array[i++] = index + offset;
        }
        return array;
    }

    private void addTagIndexes(Word word, int head, boolean insert) {
        List<SymbolTable<String>> tag_tables = this.getTagTables();
        String pos_tag = word.getPosTag();
        String morph = word.getMorphTag();
        int[] tag_indexes = new int[tag_tables.size()];
        tag_indexes[0] = pos_tag == null ? -1 : tag_tables.get(0).toIndex(pos_tag, -1, insert);
        if (this.tag_morph_) {
            tag_indexes[1] = morph == null ? -1 : tag_tables.get(1).toIndex(morph, -1, insert);
        }
        word.setTagIndexes(tag_indexes);
    }

    private void addShape(Word word, String form, boolean insert) {
        if (this.shape_) {
            int word_index = word.getWordFormIndex();
            if (this.vocab_ == null) {
                return;
            }
            if (this.isRare(word_index)) {
                int shape_index = -1;
                if (this.trie_ != null) {
                    String shape = Integer.toString(this.trie_.classify(form));
                    shape_index = this.shape_table_.toIndex(shape, -1, insert);
                }
                word.setWordShapeIndex(shape_index);
            }
        }
    }

    public boolean isRare(int word) {
        if (word < 0 || word >= this.vocab_.length) {
            return true;
        }
        return this.vocab_[word] < this.rare_word_max_freq_;
    }

    public SymbolTable<String> getWordTable() {
        return this.word_table_;
    }

    public static Tagger trainOptimal(MorphOptions options, Collection<Sequence> train_sentences, Collection<Sequence> test_sentences, List<String> parameters, List<List<String>> values_list, List<MorphEntry> results) {
        if (test_sentences == null) {
            throw new InvalidParameterException("test_sentences is null!");
        }
        assert (parameters.size() == values_list.size());
        assert (!parameters.isEmpty());
        if (parameters.size() == 1) {
            return MorphModel.trainOptimal(options, train_sentences, test_sentences, parameters.get(0), (Collection<String>)values_list.get(0), results);
        }
        parameters = new LinkedList<String>(parameters);
        values_list = new LinkedList<List<String>>(values_list);
        Tagger best_tagger = null;
        String parameter = (String)((LinkedList)parameters).pollFirst();
        Collection values2 = (Collection)((LinkedList)values_list).pollFirst();
        for (String value : values2) {
            options = Copy.clone(options);
            options.setProperty(parameter, value);
            Tagger tagger = MorphModel.trainOptimal(options, train_sentences, test_sentences, parameters, values_list, results);
            if (best_tagger == null) {
                best_tagger = tagger;
                continue;
            }
            if (!(tagger.getResult().getScore() > best_tagger.getResult().getScore())) continue;
            best_tagger = tagger;
        }
        return best_tagger;
    }

    public static Tagger trainOptimal(MorphOptions options, Collection<Sequence> train_sentences, Collection<Sequence> test_sentences, String parameter, Collection<String> values2, List<MorphEntry> results) {
        Tagger best_tagger = null;
        if (test_sentences == null) {
            throw new InvalidParameterException("test_sentebces is null!");
        }
        for (String value : values2) {
            options = Copy.clone(options);
            options.setProperty(parameter, value);
            Tagger tagger = MorphModel.train(Copy.clone(options), train_sentences, test_sentences);
            results.add(new MorphEntry(options, (MorphResult)tagger.getResult()));
            if (best_tagger == null) {
                best_tagger = tagger;
                continue;
            }
            if (!(tagger.getResult().getScore() > best_tagger.getResult().getScore())) continue;
            best_tagger = tagger;
        }
        return best_tagger;
    }

    public static Tagger trainOptimal(MorphOptions options, List<Sequence> train_sentences, List<Sequence> test_sentences) {
        if (test_sentences == null) {
            throw new InvalidParameterException("test_sentences is null!");
        }
        List<String> parameters = Arrays.asList("order", "seed", "penalty");
        LinkedList<MorphEntry> results = new LinkedList<MorphEntry>();
        List<List<String>> values_list = Arrays.asList(Arrays.asList("1", "3", "5"), Arrays.asList("41", "42", "43"), Arrays.asList("0.0", "0.05", "0.1", "0.5"));
        Tagger tagger = MorphModel.trainOptimal(options, train_sentences, test_sentences, parameters, values_list, results);
        Collections.sort(results);
        System.err.println("OPTIMAL OPTIONS AND RESULTS");
        for (MorphEntry result : results) {
            StringBuilder sb = new StringBuilder();
            for (String param : parameters) {
                if (sb.length() > 0) {
                    sb.append(',');
                    sb.append(' ');
                }
                sb.append(param);
                sb.append(':');
                sb.append(result.getOptions().getProperty(param));
            }
            sb.append('\t');
            sb.append(result.getResult().getScore());
            System.err.println(sb.toString());
        }
        return tagger;
    }

    public static Tagger train(MorphOptions options, Collection<Sequence> train_sentences, Collection<Sequence> test_sentences) {
        MorphModel model = new MorphModel();
        model.init(options, train_sentences);
        if (test_sentences != null) {
            for (Sequence sentence : test_sentences) {
                for (Token token : sentence) {
                    Word word = (Word)token;
                    model.addIndexes(word, false);
                }
            }
        }
        MorphWeightVector weights = new MorphWeightVector(options);
        weights.init(model, train_sentences);
        MorphTagger tagger = new MorphTagger(model, model.getOrder(), weights);
        Trainer trainer = TrainerFactory.create(options);
        MorphEvaluator evaluator = null;
        if (test_sentences != null) {
            evaluator = new MorphEvaluator(test_sentences);
        }
        trainer.train(tagger, train_sentences, evaluator);
        if (options.getLemmatizer() && options.getLemmaPretraining()) {
            model.skip_lemma_ = false;
            if (options.getVerbose()) {
                System.err.format("Training with lemmatizer.\n", new Object[0]);
            }
            trainer.train(tagger, train_sentences, evaluator);
        }
        return tagger;
    }

    public SymbolTable<Character> getCharTable() {
        return this.char_table_;
    }

    public int getNumShapes() {
        if (this.trie_ == null) {
            return this.shape_table_.size();
        }
        return this.trie_.getIndex();
    }

    public SymbolTable<String> getShapeTable() {
        return this.shape_table_;
    }

    public boolean isOOV(int form_index) {
        return form_index < 0 || this.vocab_[form_index] == 0;
    }

    public int getNumSubTags() {
        int total = 0;
        if (this.subtag_tables_ != null) {
            for (SymbolTable<String> table : this.subtag_tables_) {
                if (table == null) continue;
                total += table.size();
            }
        }
        return total;
    }

    public SymbolTable<String> getTokenFeatureTable() {
        return this.token_feature_table_;
    }

    public SymbolTable<String> getWeightedTokenFeatureTable() {
        return this.weighted_token_feature_table_;
    }

    @Override
    public int[] getTagCandidates(Sequence sequence, int index, State state) {
        Token token;
        Word word;
        int word_index;
        int level;
        int n = level = state == null ? 0 : state.getLevel() + 1;
        if (this.transitions_ != null && level == 1) {
            return this.transitions_[state.getIndex()];
        }
        if (level == 0 && this.restrict_pos_tags_to_seen_combinations_ && !this.isRare(word_index = (word = (Word)(token = (Token)sequence.get(index))).getWordFormIndex())) {
            return this.word_to_observed_tags_[word_index];
        }
        return this.tag_classes_[level];
    }

    public int[][][] getTagToSubTags() {
        return this.tag_to_subtag_;
    }

    public void setVerbose(boolean verbose) {
        this.verbose_ = verbose;
    }

    public int getMaxSignature() {
        return FeatUtil.getMaxSignature(this.special_signature_);
    }

    public static Tagger train(MorphOptions options, List<Sequence> train_sequences) {
        return MorphModel.train(options, train_sequences, null);
    }

    @Override
    public void setLemmaCandidates(Token token, State state, boolean preprune, boolean training) {
        if (this.lemma_model_ == null || preprune != this.lemma_prepruning_extraction_) {
            return;
        }
        int pos_index = state.getIndex();
        Word word = (Word)token;
        RankerInstance instance = this.getRankerInstance(word, pos_index, training);
        assert (instance != null);
        LemmaCandidateSet candidate_set = instance.getCandidateSet();
        ArrayList<RankerCandidate> candidates = new ArrayList<RankerCandidate>(candidate_set.size());
        assert (state.getLevel() == 0);
        int[] morph_indexes = RankerInstance.EMPTY_ARRAY;
        String lemma = word.getLemma().toLowerCase();
        for (Map.Entry<String, LemmaCandidate> entry : candidate_set) {
            String plemma = entry.getKey();
            boolean is_correct = plemma.equals(lemma);
            LemmaCandidate candidate = entry.getValue();
            assert (candidate != null);
            double score = this.getLemmaCandidateScore(candidate, candidate_set, pos_index, morph_indexes, instance, training);
            RankerCandidate rcandidate = new RankerCandidate(plemma, candidate, is_correct, score);
            assert (rcandidate.getCandidate() != null);
            candidates.add(rcandidate);
        }
        state.setLemmaCandidates(candidates);
        state.setLemmaScoreSum();
        assert (state.getLemmaCandidates() != null);
    }

    private double getLemmaCandidateScore(LemmaCandidate candidate, LemmaCandidateSet candidate_set, int pos_index, int[] morph_indexes, RankerInstance instance, boolean training) {
        if (this.skip_lemma_) {
            candidate.setFeatureIndexes(RankerInstance.EMPTY_ARRAY);
            return 0.0;
        }
        if (candidate.getFeatureIndexes() == null || candidate.getFeatureIndexes() == RankerInstance.EMPTY_ARRAY) {
            for (Map.Entry<String, LemmaCandidate> entry : candidate_set) {
                if (entry.getValue().getFeatureIndexes() != RankerInstance.EMPTY_ARRAY) continue;
                entry.getValue().setFeatureIndexes(null);
            }
            candidate.setFeatureIndexes(null);
            this.lemma_model_.addIndexes(instance, candidate_set, training);
            for (Map.Entry<String, LemmaCandidate> entry : candidate_set) {
                assert (entry.getValue().getFeatureIndexes() != null);
                assert (entry.getValue().getFeatureIndexes() != RankerInstance.EMPTY_ARRAY);
            }
            assert (candidate.getFeatureIndexes() != null);
            assert (candidate.getFeatureIndexes() != RankerInstance.EMPTY_ARRAY);
        }
        return this.lemma_model_.score(candidate, pos_index, morph_indexes);
    }

    @Override
    public void setLemmaCandidates(State state, boolean preprune) {
        if (this.lemma_model_ == null || preprune != this.lemma_prepruning_extraction_) {
            return;
        }
        assert (state.getLevel() == 1);
        State previous_state = state.getSubLevelState();
        assert (previous_state != null);
        assert (state != null);
        assert (previous_state.getOrder() == 1);
        assert (state.getOrder() == 1);
        List<RankerCandidate> prev_candidates = previous_state.getLemmaCandidates();
        assert (prev_candidates != null);
        assert (previous_state.getLevel() == 0);
        int pos_index = previous_state.getIndex();
        int morph_index = state.getIndex();
        int[] morph_indexes = this.getTagToSubTags()[state.getLevel()][morph_index];
        if (morph_indexes == null) {
            morph_indexes = RankerInstance.EMPTY_ARRAY;
        }
        if (!this.lemma_use_morph_) {
            morph_indexes = RankerInstance.EMPTY_ARRAY;
        }
        ArrayList<RankerCandidate> candidates = new ArrayList<RankerCandidate>(prev_candidates.size());
        for (RankerCandidate prev_candidate : prev_candidates) {
            String pLemma = prev_candidate.getLemma();
            LemmaCandidate pcandidate = prev_candidate.getCandidate();
            double score = this.lemma_model_.score(pcandidate, pos_index, morph_indexes);
            candidates.add(new RankerCandidate(pLemma, pcandidate, prev_candidate.isCorrect(), score));
        }
        state.setLemmaCandidates(candidates);
        state.setLemmaScoreSum();
    }

    public RankerModel getLemmaModel() {
        return this.lemma_model_;
    }

    @Override
    public boolean getMarganlizeLemmas() {
        return this.marginalize_lemmas_;
    }

    public boolean getLemmaUseMorph() {
        return this.lemma_use_morph_;
    }

    public static class MorphEntry
    implements Comparable<MorphEntry> {
        private MorphOptions options_;
        private MorphResult result_;

        public MorphEntry(MorphOptions options, MorphResult result) {
            this.options_ = options;
            this.result_ = result;
        }

        @Override
        public int compareTo(MorphEntry o) {
            return -Double.compare(this.result_.getScore(), o.result_.getScore());
        }

        public MorphOptions getOptions() {
            return this.options_;
        }

        public MorphResult getResult() {
            return this.result_;
        }
    }
}

