/*
 * Decompiled with CFR 0.152.
 */
package marmot.morph.signature;

import java.io.Serializable;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.PriorityQueue;
import java.util.Set;
import marmot.core.Sequence;
import marmot.core.Token;
import marmot.morph.Word;
import marmot.morph.io.SentenceReader;
import marmot.morph.signature.Feature;
import marmot.morph.signature.Split;
import marmot.util.Counter;
import marmot.util.FileUtils;
import marmot.util.SymbolTable;

public class Trie
implements Serializable {
    private static final long serialVersionUID = 1L;
    protected transient List<String> words_ = new ArrayList<String>();
    protected transient List<List<List<Integer>>> tags_ = new ArrayList<List<List<Integer>>>();
    private transient boolean[] feature_map_;
    private List<Trie> children_;
    private double[] entropy_ = null;
    private Feature feature_;
    private int child_index_ = -1;
    private Trie parent_ = null;
    private int index_;
    private Set<String> no_signature_;
    private boolean verbose_;

    public Trie(Trie trie, int feature_index, int child_index) {
        this(null, trie.verbose_);
        this.feature_map_ = new boolean[trie.feature_map_.length];
        System.arraycopy(trie.feature_map_, 0, this.feature_map_, 0, this.feature_map_.length);
        this.feature_map_[feature_index] = false;
        this.child_index_ = child_index;
        this.parent_ = trie;
    }

    public Trie(Set<String> no_signature, boolean verbose) {
        this.no_signature_ = no_signature;
        this.verbose_ = verbose;
    }

    public void add(List<List<Integer>> tags, String word) {
        this.words_.add(word);
        this.tags_.add(tags);
    }

    public void split(int limit, Set<String> vocab) {
        this.children_ = null;
        List<Feature> features = this.getFeatures(vocab);
        PriorityQueue<Split> splits = new PriorityQueue<Split>();
        this.feature_map_ = new boolean[features.size()];
        Arrays.fill(this.feature_map_, true);
        LinkedList<Trie> tries = new LinkedList<Trie>();
        tries.add(this);
        for (int num_leaves = 1; num_leaves < limit && !tries.isEmpty(); ++num_leaves) {
            Split split;
            Trie trie2;
            for (Trie trie2 : tries) {
                for (int feature_index = 0; feature_index < features.size(); ++feature_index) {
                    if (!trie2.feature_map_[feature_index]) continue;
                    Split split2 = new Split(features, trie2, feature_index);
                    if (split2.valid_) {
                        splits.add(split2);
                        continue;
                    }
                    trie2.feature_map_[feature_index] = false;
                }
            }
            tries.clear();
            while ((split = (Split)splits.poll()) != null && !split.trie_.isLeaf()) {
            }
            if (split == null) break;
            trie2 = split.trie_;
            assert (trie2.children_ == null);
            trie2.children_ = split.children_;
            trie2.feature_ = features.get(split.feature_index_);
            for (Trie child : trie2.children_) {
                tries.add(child);
            }
        }
        LinkedList<Trie> leaves = new LinkedList<Trie>();
        this.getLeafes(leaves);
        int words = 0;
        for (Trie leaf : leaves) {
            words += leaf.words_.size();
        }
        assert (this.words_.size() == words);
        this.clear(0);
    }

    private List<Feature> getFeatures(Set<String> vocab) {
        char C;
        int position;
        ArrayList<Feature> features = new ArrayList<Feature>();
        features.add(new Feature(){
            private static final long serialVersionUID = 1L;

            @Override
            boolean feature(String word) {
                for (int index = 0; index < word.length(); ++index) {
                    char c = word.charAt(index);
                    if (!Character.isDigit(c)) continue;
                    return true;
                }
                return false;
            }

            @Override
            String getName() {
                return "HasDigit";
            }
        });
        features.add(new Feature(){
            private static final long serialVersionUID = 1L;

            @Override
            boolean feature(String word) {
                for (int index = 0; index < word.length(); ++index) {
                    char c = word.charAt(index);
                    if (!Character.isLetter(c)) continue;
                    return true;
                }
                return false;
            }

            @Override
            String getName() {
                return "HasLetter";
            }
        });
        features.add(new Feature(){
            private static final long serialVersionUID = 1L;

            @Override
            boolean feature(String word) {
                for (int index = 0; index < word.length(); ++index) {
                    char c = word.charAt(index);
                    if (!Character.isUpperCase(c)) continue;
                    return true;
                }
                return false;
            }

            @Override
            String getName() {
                return "HasUpper";
            }
        });
        features.add(new Feature(){
            private static final long serialVersionUID = 1L;

            @Override
            boolean feature(String word) {
                for (int index = 0; index < word.length(); ++index) {
                    char c = word.charAt(index);
                    if (!Character.isLowerCase(c)) continue;
                    return true;
                }
                return false;
            }

            @Override
            String getName() {
                return "HasLower";
            }
        });
        int length = 1;
        while (length < 10) {
            final int length_ = length++;
            features.add(new Feature(){
                private static final long serialVersionUID = 1L;

                @Override
                boolean feature(String word) {
                    return word.length() > length_;
                }

                @Override
                String getName() {
                    return "Length>" + length_;
                }
            });
        }
        Counter<Character> alphabet = new Counter<Character>();
        for (String string : this.words_) {
            for (int index = 0; index < string.length(); ++index) {
                char c = Character.toLowerCase(string.charAt(index));
                alphabet.increment(Character.valueOf(c), 1.0);
            }
        }
        for (Map.Entry entry : alphabet.entrySet()) {
            if (!((Double)entry.getValue() > 50.0)) continue;
            final char C2 = ((Character)entry.getKey()).charValue();
            features.add(new Feature(){
                private static final long serialVersionUID = 1L;

                @Override
                boolean feature(String word) {
                    for (int index = 0; index < word.length(); ++index) {
                        char c = Character.toLowerCase(word.charAt(index));
                        if (c != C2) continue;
                        return true;
                    }
                    return false;
                }

                @Override
                String getName() {
                    return "Contains=" + C2;
                }
            });
        }
        for (position = 1; position <= 5; ++position) {
            final int n = position;
            for (Map.Entry entry : alphabet.entrySet()) {
                if (!(entry.getValue() > 50.0)) continue;
                C = ((Character)entry.getKey()).charValue();
                features.add(new Feature(){
                    private static final long serialVersionUID = 1L;

                    @Override
                    boolean feature(String word) {
                        int index = word.length() - n;
                        if (index < 0) {
                            return false;
                        }
                        return Character.toLowerCase(word.charAt(index)) == C;
                    }

                    @Override
                    String getName() {
                        return "Char[-" + n + "]=" + C;
                    }
                });
            }
        }
        for (position = 0; position < 5; ++position) {
            final int n = position;
            for (Map.Entry entry : alphabet.entrySet()) {
                if (!(entry.getValue() > 50.0)) continue;
                C = ((Character)entry.getKey()).charValue();
                features.add(new Feature(){
                    private static final long serialVersionUID = 1L;

                    @Override
                    boolean feature(String word) {
                        int index = n;
                        if (index >= word.length()) {
                            return false;
                        }
                        return Character.toLowerCase(word.charAt(index)) == C;
                    }

                    @Override
                    String getName() {
                        return "Char[" + n + "]=" + C;
                    }
                });
            }
        }
        final HashSet<String> known_lowercase_words = new HashSet<String>();
        for (String word : vocab) {
            if (!word.toLowerCase().equals(word)) continue;
            known_lowercase_words.add(word);
        }
        features.add(new Feature(){
            private static final long serialVersionUID = 1L;

            @Override
            String getName() {
                return "LowerIsKnown";
            }

            @Override
            boolean feature(String word) {
                String lower = word.toLowerCase();
                if (lower.equals(word)) {
                    return true;
                }
                return known_lowercase_words.contains(lower);
            }
        });
        return features;
    }

    public boolean isLeaf() {
        return this.children_ == null;
    }

    public double[] getEntropy() {
        if (this.entropy_ == null) {
            if (this.tags_.isEmpty()) {
                return null;
            }
            int K = this.tags_.get(0).size();
            this.entropy_ = new double[K];
            for (int k = 0; k < K; ++k) {
                double entropy = 0.0;
                Counter<Integer> counter = new Counter<Integer>();
                assert (!this.tags_.get(k).isEmpty());
                for (List<List<Integer>> tag_list : this.tags_) {
                    for (int tag : tag_list.get(k)) {
                        counter.increment(tag, 1.0);
                    }
                }
                Iterator<Object> iterator2 = counter.counts().iterator();
                while (iterator2.hasNext()) {
                    double count2 = (Double)iterator2.next();
                    double prob = count2 / counter.totalCount();
                    entropy -= prob * Math.log(prob);
                }
                this.entropy_[k] = entropy;
            }
        }
        return this.entropy_;
    }

    public String signature() {
        if (this.parent_ == null) {
            return "";
        }
        if (this.isLeaf()) assert (this.feature_ == null);
        StringBuilder sb = new StringBuilder();
        sb.append(this.parent_.signature());
        Feature feature = this.parent_.feature_;
        if (sb.length() > 0) {
            sb.append(',');
        }
        sb.append(feature.getName());
        sb.append('=');
        sb.append(this.child_index_ == 0 ? (char)'t' : 'f');
        return sb.toString();
    }

    public int classify(String word) {
        if (this.no_signature_.contains(word)) {
            return -1;
        }
        return this.classify_(word);
    }

    private int classify_(String word) {
        if (this.isLeaf()) {
            return this.index_;
        }
        assert (this.feature_ != null);
        int value = this.feature_.feature(word) ? 0 : 1;
        return this.children_.get(value).classify_(word);
    }

    public void getLeafes(List<Trie> leaves) {
        if (this.isLeaf()) {
            leaves.add(this);
        } else {
            for (Trie child : this.children_) {
                child.getLeafes(leaves);
            }
        }
    }

    public int clear(int index) {
        if (this.isLeaf()) {
            if (this.verbose_) {
                System.err.println(index);
                System.err.println(Arrays.toString(this.getEntropy()));
                System.err.println(this.signature());
                System.err.println("words " + this.words_.size() + " " + Split.shorten(this.words_));
                System.err.println();
            }
            this.index_ = index++;
        } else {
            this.index_ = -1;
            for (Trie child : this.children_) {
                index = child.clear(index);
            }
        }
        this.words_ = null;
        this.tags_ = null;
        this.feature_map_ = null;
        if (this.parent_ == null) {
            this.index_ = index;
        }
        return index;
    }

    public int getIndex() {
        return this.index_;
    }

    public static Trie train(String trainfile, boolean verbose) {
        return Trie.train(trainfile, verbose, 20, 1);
    }

    public static Trie train(String trainfile, boolean verbose, int num_folds, int K) {
        LinkedList<Sequence> sentences = new LinkedList<Sequence>();
        for (Sequence sentence : new SentenceReader(trainfile)) {
            sentences.add(sentence);
        }
        return Trie.train(sentences, verbose, num_folds, K);
    }

    public static Trie train(Collection<Sequence> sentences, boolean verbose) {
        return Trie.train(sentences, verbose, 20, 1);
    }

    public static Trie train(Collection<Sequence> sentences, boolean verbose, int num_folds, int K) {
        Word word;
        int sentences_per_fold = sentences.size() / num_folds;
        if (sentences.size() < num_folds) {
            throw new RuntimeException("Training set is to small: |sentences| = " + sentences.size() + " num folds =" + num_folds);
        }
        HashSet<String> known = new HashSet<String>();
        HashMap map2 = new HashMap();
        SymbolTable<String> tags = new SymbolTable<String>();
        HashSet<String> vocab = new HashSet<String>();
        for (Sequence sentence : sentences) {
            for (Object token : sentence) {
                word = (Word)token;
                vocab.add(word.getWordForm());
                tags.toIndex(word.getPosTag(), true);
            }
        }
        int start_index = 0;
        while (start_index < sentences.size()) {
            known.clear();
            int end_index = start_index + sentences_per_fold;
            if (end_index + sentences_per_fold >= sentences.size()) {
                end_index = sentences.size();
            }
            int index = 0;
            for (Sequence sentence : sentences) {
                if (index >= start_index && index < end_index) {
                    for (Token token : sentence) {
                        Word word2 = (Word)token;
                        known.add(word2.getWordForm());
                    }
                }
                ++index;
            }
            vocab.retainAll(known);
            start_index = end_index;
        }
        for (Sequence sentence : sentences) {
            for (int i = 0; i < sentence.size(); ++i) {
                int k;
                word = (Word)sentence.get(i);
                String form = word.getWordForm();
                if (vocab.contains(form)) continue;
                LinkedList tag_list = (LinkedList)map2.get(form);
                if (tag_list == null) {
                    tag_list = new LinkedList();
                    map2.put(form, tag_list);
                    for (k = 0; k < K; ++k) {
                        tag_list.add(new LinkedList());
                    }
                }
                for (k = 0; k < K; ++k) {
                    int shifted_index = i + k - K / 2;
                    if (shifted_index >= sentence.size() || shifted_index < 0) continue;
                    int tag = tags.toIndex(((Word)sentence.get(shifted_index)).getPosTag());
                    ((List)tag_list.get(k)).add(tag);
                }
            }
        }
        Trie trie = new Trie(vocab, verbose);
        for (Map.Entry entry : map2.entrySet()) {
            trie.add((List)entry.getValue(), (String)entry.getKey());
        }
        trie.split(100, vocab);
        return trie;
    }

    public static void main(String[] args) {
        if (args.length != 2) {
            System.err.println("Usage: Trie form-index=?,tag-index=?,train-file outputfile");
            System.exit(1);
        }
        Trie trie = Trie.train(args[0], true);
        FileUtils.saveToFile((Serializable)trie, args[1]);
    }
}

