/*
 * Decompiled with CFR 0.152.
 */
package experimental.morfessor;

import experimental.morfessor.Morpheme;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import marmot.util.FileUtils;

public class ViterbiDecoder
implements Serializable {
    private static final long serialVersionUID = 1L;
    private static final double LOG_PROB_ZERO_ = 100000.0;
    private static final Pattern TAG_LINE_PATTERN_ = Pattern.compile("P\\(([^ ]+) -> ([^\\)]+)\\) = ([0-9.]+) \\(N = ([0-9]+)\\)");
    private static final Pattern MORPH_LINE_PATTERN_ = Pattern.compile("([^\\s]+)\\s(.+)");
    private double log_prob_max_;
    private Map<String, Integer> tag_ids_;
    private String[] tag_names_;
    private Map<String, Integer> morph_ids_;
    private double[][] transition_probs_;
    private double[][] emission_probs_;
    private int zzz_index_;

    public ViterbiDecoder(String probs_file, double cutoff) {
        this.read(probs_file, cutoff);
    }

    public ViterbiDecoder(String probs_file) {
        this(probs_file, 1.0E-11);
    }

    private void readSymbols(String probsfile) {
        this.tag_ids_ = new HashMap<String, Integer>();
        LinkedList<String> tag_names = new LinkedList<String>();
        this.morph_ids_ = new HashMap<String, Integer>();
        try {
            BufferedReader reader = FileUtils.openFile(probsfile);
            while (reader.ready()) {
                String line = reader.readLine();
                if (line.startsWith("#")) continue;
                Matcher m = TAG_LINE_PATTERN_.matcher(line);
                if (m.matches()) {
                    int tagid;
                    String tag1 = m.group(1);
                    String tag2 = m.group(2);
                    if (!this.tag_ids_.containsKey(tag1)) {
                        tagid = this.tag_ids_.size();
                        this.tag_ids_.put(tag1, tagid);
                        tag_names.add(tag1);
                    }
                    if (this.tag_ids_.containsKey(tag2)) continue;
                    tagid = this.tag_ids_.size();
                    this.tag_ids_.put(tag2, tagid);
                    tag_names.add(tag2);
                    continue;
                }
                m = MORPH_LINE_PATTERN_.matcher(line);
                if (!m.matches()) continue;
                String morph = m.group(1);
                this.morph_ids_.put(morph, this.morph_ids_.size());
            }
            reader.close();
        }
        catch (IOException e) {
            throw new RuntimeException(e);
        }
        this.tag_names_ = tag_names.toArray(new String[0]);
        this.transition_probs_ = new double[this.tag_names_.length][this.tag_names_.length];
        this.emission_probs_ = new double[this.morph_ids_.size()][this.tag_names_.length];
    }

    private void read(String probsfile, double cutoff) {
        this.readSymbols(probsfile);
        this.log_prob_max_ = 0.0;
        int[] ntagged = new int[this.tag_names_.length];
        try {
            BufferedReader reader = FileUtils.openFile(probsfile);
            while (reader.ready()) {
                String line = reader.readLine();
                if (line.startsWith("#")) continue;
                Matcher m = TAG_LINE_PATTERN_.matcher(line);
                if (m.matches()) {
                    String tag1 = m.group(1);
                    String tag2 = m.group(2);
                    double p = Double.parseDouble(m.group(3));
                    int n = Integer.parseInt(m.group(4));
                    int tagid1 = this.tag_ids_.get(tag1);
                    int tagid2 = this.tag_ids_.get(tag2);
                    double logprob = p == 0.0 ? 100000.0 : -Math.log(p);
                    this.transition_probs_[tagid1][tagid2] = logprob;
                    int n2 = tagid1;
                    ntagged[n2] = ntagged[n2] + n;
                    continue;
                }
                m = MORPH_LINE_PATTERN_.matcher(line);
                if (!m.matches()) continue;
                String morph = m.group(1);
                String[] probs = m.group(2).split("\\s");
                int morph_id = this.morph_ids_.get(morph);
                for (int i = 0; i < probs.length; ++i) {
                    double logp;
                    double prob = Double.parseDouble(probs[i]);
                    double noocs = (double)ntagged[i] * prob;
                    if (noocs < cutoff) {
                        logp = 100000.0;
                    } else {
                        logp = -Math.log(prob);
                        if (logp > this.log_prob_max_) {
                            this.log_prob_max_ = logp;
                        }
                    }
                    this.emission_probs_[morph_id][i] = logp;
                }
            }
            reader.close();
        }
        catch (IOException e) {
            throw new RuntimeException(e);
        }
        this.zzz_index_ = this.tag_ids_.get("ZZZ");
    }

    public List<Morpheme> split(String word) {
        int tag;
        double[][][] delta = new double[word.length() + 1][word.length() + 1][this.tag_ids_.size()];
        int[][][] psi_prevlen = new int[word.length() + 1][word.length() + 1][this.tag_ids_.size()];
        int[][][] psi_prevtag = new int[word.length() + 1][word.length() + 1][this.tag_ids_.size()];
        String[][][] psi_asterisk = new String[word.length() + 1][word.length() + 1][this.tag_ids_.size()];
        ArrayList<Object> morphs = new ArrayList<Object>();
        for (int position = 1; position <= word.length(); ++position) {
            for (int length = 1; length <= position; ++length) {
                int prev_position = position - length;
                String morphnoasterisk = word.substring(prev_position, position);
                String[] tails = new String[]{"", "*0", "*1", "*1", "*2", "*3", "*4"};
                morphs.clear();
                for (String tail : tails) {
                    String morph = morphnoasterisk + tail;
                    if (!this.morph_ids_.containsKey(morph)) continue;
                    morphs.add(morph);
                }
                if (morphs.isEmpty()) {
                    if (length == 1) {
                        morphs.add(morphnoasterisk);
                    } else {
                        for (tag = 1; tag < this.tag_ids_.size(); ++tag) {
                            delta[position][length][tag] = 100000.0;
                            psi_prevlen[position][length][tag] = 0;
                            psi_prevtag[position][length][tag] = 0;
                            psi_asterisk[position][length][tag] = "";
                        }
                        continue;
                    }
                }
                for (tag = 1; tag < this.tag_ids_.size(); ++tag) {
                    double best_cost = 100000.0;
                    int best_prev_length = -1;
                    int best_prevtag = -1;
                    String best_asterisk = null;
                    for (String string : morphs) {
                        double log_prob_morph;
                        Integer morph_id = this.morph_ids_.get(string);
                        if (morph_id == null) {
                            log_prob_morph = 100000.0;
                            if (tag == this.zzz_index_) {
                                log_prob_morph = 10.0 * this.log_prob_max_;
                            }
                        } else {
                            log_prob_morph = this.emission_probs_[morph_id][tag - 1];
                        }
                        if (prev_position == 0) {
                            double cost = this.transition_probs_[0][tag] + log_prob_morph;
                            if (!(cost <= best_cost)) continue;
                            best_cost = cost;
                            best_prev_length = 0;
                            best_prevtag = 0;
                            best_asterisk = string;
                            continue;
                        }
                        for (int prev_length = 1; prev_length <= prev_position; ++prev_length) {
                            for (int prev_tag = 1; prev_tag < this.tag_ids_.size(); ++prev_tag) {
                                double cost = delta[prev_position][prev_length][prev_tag] + this.transition_probs_[prev_tag][tag] + log_prob_morph;
                                if (!(cost <= best_cost)) continue;
                                best_cost = cost;
                                best_prev_length = prev_length;
                                best_prevtag = prev_tag;
                                best_asterisk = string;
                            }
                        }
                    }
                    delta[position][length][tag] = best_cost;
                    psi_prevlen[position][length][tag] = best_prev_length;
                    psi_prevtag[position][length][tag] = best_prevtag;
                    psi_asterisk[position][length][tag] = best_asterisk;
                }
            }
        }
        double best_cost = 100000.0;
        int best_length = -1;
        int best_tag = -1;
        for (int length = 1; length <= word.length(); ++length) {
            for (tag = 1; tag < this.tag_ids_.size(); ++tag) {
                double cost = delta[word.length()][length][tag] + this.transition_probs_[tag][0];
                if (!(cost <= best_cost)) continue;
                best_cost = cost;
                best_length = length;
                best_tag = tag;
            }
        }
        if (best_cost == 100000.0) {
            System.err.println("best cost is zero");
            return Collections.singletonList(new Morpheme(word + "/ZZZ"));
        }
        LinkedList<Morpheme> morpheme_list = new LinkedList<Morpheme>();
        for (int position = word.length(); position > 0; position -= best_length) {
            String morph = psi_asterisk[position][best_length][best_tag];
            morpheme_list.add(new Morpheme(morph + "/" + this.tag_names_[best_tag]));
            int best_prev_length = psi_prevlen[position][best_length][best_tag];
            int best_prev_tag = psi_prevtag[position][best_length][best_tag];
            best_length = best_prev_length;
            best_tag = best_prev_tag;
        }
        Collections.reverse(morpheme_list);
        return morpheme_list;
    }
}

